* [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
[not found] <20090307045120.039324630@linux.vnet.ibm.com>
@ 2009-03-07 5:07 ` prasad
2009-03-07 14:53 ` KOSAKI Motohiro
0 siblings, 1 reply; 33+ messages in thread
From: prasad @ 2009-03-07 5:07 UTC (permalink / raw)
To: mingo
Cc: Andrew Morton, Linux Kernel Mailing List, Alan Stern,
Roland McGrath, K.Prasad
[-- Attachment #1: ftrace_hbkpt_12 --]
[-- Type: text/plain, Size: 16052 bytes --]
This patch adds an ftrace plugin to detect and profile memory access over
kernel variables. It uses HW Breakpoint interfaces to 'watch memory
addresses.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
kernel/trace/Kconfig | 6
kernel/trace/Makefile | 1
kernel/trace/trace.h | 16 +
kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_selftest.c | 36 +++
5 files changed, 507 insertions(+)
Index: linux-2.6-tip/kernel/trace/Kconfig
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Kconfig
+++ linux-2.6-tip/kernel/trace/Kconfig
@@ -249,6 +249,12 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
config STACK_TRACER
bool "Trace max stack"
Index: linux-2.6-tip/kernel/trace/Makefile
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Makefile
+++ linux-2.6-tip/kernel/trace/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_WORKQUEUE_TRACER) += trace_
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_EVENT_TRACER) += trace_events.o
obj-$(CONFIG_EVENT_TRACER) += events.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
libftrace-y := ftrace.o
Index: linux-2.6-tip/kernel/trace/trace.h
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace.h
+++ linux-2.6-tip/kernel/trace/trace.h
@@ -12,6 +12,8 @@
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#include <asm/hw_breakpoint.h>
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -34,6 +36,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -191,6 +194,16 @@ struct kmemtrace_free_entry {
const void *ptr;
};
+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbkpt;
+ unsigned long ksym_addr;
+ unsigned long ip;
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -303,6 +316,7 @@ extern void __ftrace_bad_type(void);
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)
@@ -564,6 +578,8 @@ extern int trace_selftest_startup_syspro
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
Index: linux-2.6-tip/kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/kernel/trace/trace_ksym.c
@@ -0,0 +1,448 @@
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#ifdef CONFIG_FTRACE_SELFTEST
+
+static int ksym_selftest_dummy;
+#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
+
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static struct trace_array *ksym_trace_array;
+
+DEFINE_MUTEX(ksym_tracer_mutex);
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+/* HW Breakpoint related callback functions */
+void ksym_hbkpt_installed(struct hw_breakpoint *temp, struct pt_regs
+ *temp_regs)
+{
+}
+
+void ksym_hbkpt_uninstalled(struct hw_breakpoint *temp, struct
+ pt_regs * temp_regs)
+{
+}
+
+void ksym_hbkpt_handler(struct hw_breakpoint *hbkpt, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbkpt->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbkpt = hbkpt;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
+ (*addr == 0))
+ goto return_code;
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbkpt) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
+ entry->ksym_hbkpt->info.name = ksymname;
+ entry->ksym_hbkpt->info.type = op;
+ entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
+ entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
+ entry->ksym_hbkpt->priority = HW_BREAKPOINT_PRIO_NORMAL;
+
+ entry->ksym_hbkpt->installed = (void *)ksym_hbkpt_installed;
+ entry->ksym_hbkpt->uninstalled = (void *)ksym_hbkpt_uninstalled;
+ entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
+
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ if (ret <= 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ if (ret == 0)
+ unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbkpt->info.name);
+ if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbkpt->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ entry->ksym_hbkpt->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ if (ret > 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ if (ret == 0) {
+ ret = -ENOSPC;
+ unregister_kernel_hw_breakpoint(entry->\
+ ksym_hbkpt);
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ ksym_tracing_enabled = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+/* Free the 'input_string' only if reset after startup self-test */
+#ifdef CONFIG_FTRACE_SELFTEST
+ if (strncmp(entry->ksym_hbkpt->info.name, KSYM_SELFTEST_ENTRY,
+ strlen(KSYM_SELFTEST_ENTRY)) != 0)
+#endif /* CONFIG_FTRACE_SELFTEST*/
+ kfree(entry->ksym_hbkpt->info.name);
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+#ifdef CONFIG_FTRACE_SELFTEST
+ /* Check if we are re-entering self-test code during initialisation */
+ if (ksym_selftest_dummy)
+ goto ret_path;
+
+ ksym_selftest_dummy = 0;
+
+ /* Register the read-write tracing request */
+ ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+
+ret_path:
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ entry->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbkpt->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
Index: linux-2.6-tip/kernel/trace/trace_selftest.c
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace_selftest.c
+++ linux-2.6-tip/kernel/trace/trace_selftest.c
@@ -15,6 +15,7 @@ static inline int trace_valid_entry(stru
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -693,3 +694,38 @@ trace_selftest_startup_branch(struct tra
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-07 5:07 ` prasad
@ 2009-03-07 14:53 ` KOSAKI Motohiro
2009-03-07 18:21 ` K.Prasad
2009-03-08 10:09 ` Ingo Molnar
0 siblings, 2 replies; 33+ messages in thread
From: KOSAKI Motohiro @ 2009-03-07 14:53 UTC (permalink / raw)
To: prasad
Cc: kosaki.motohiro, mingo, Andrew Morton, Linux Kernel Mailing List,
Alan Stern, Roland McGrath
Hi
> This patch adds an ftrace plugin to detect and profile memory access over
> kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> addresses.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> kernel/trace/Kconfig | 6
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 16 +
> kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_selftest.c | 36 +++
> 5 files changed, 507 insertions(+)
Could you please update Documentation/ftrace.txt?
I guess many user interesting this patch. :)
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-07 14:53 ` KOSAKI Motohiro
@ 2009-03-07 18:21 ` K.Prasad
2009-03-08 10:09 ` Ingo Molnar
1 sibling, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-07 18:21 UTC (permalink / raw)
To: KOSAKI Motohiro
Cc: mingo, Andrew Morton, Linux Kernel Mailing List, Alan Stern,
Roland McGrath
On Sat, Mar 07, 2009 at 11:53:46PM +0900, KOSAKI Motohiro wrote:
> Hi
>
> > This patch adds an ftrace plugin to detect and profile memory access over
> > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > addresses.
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > kernel/trace/Kconfig | 6
> > kernel/trace/Makefile | 1
> > kernel/trace/trace.h | 16 +
> > kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/trace/trace_selftest.c | 36 +++
> > 5 files changed, 507 insertions(+)
>
> Could you please update Documentation/ftrace.txt?
> I guess many user interesting this patch. :)
>
Sure. We have plans for a documentation file for the hardware breakpoint
interfaces (something like Documentation/hw_breakpoint.txt). I shall
update the Documentation/ftrace.txt with usage details for the kernel
symbol tracing plugin along with them.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-07 14:53 ` KOSAKI Motohiro
2009-03-07 18:21 ` K.Prasad
@ 2009-03-08 10:09 ` Ingo Molnar
2009-03-08 11:00 ` Frederic Weisbecker
2009-03-09 21:36 ` K.Prasad
1 sibling, 2 replies; 33+ messages in thread
From: Ingo Molnar @ 2009-03-08 10:09 UTC (permalink / raw)
To: KOSAKI Motohiro, Frédéric Weisbecker
Cc: prasad, Andrew Morton, Linux Kernel Mailing List, Alan Stern,
Roland McGrath
* KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> Hi
>
> > This patch adds an ftrace plugin to detect and profile memory access over
> > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > addresses.
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > kernel/trace/Kconfig | 6
> > kernel/trace/Makefile | 1
> > kernel/trace/trace.h | 16 +
> > kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/trace/trace_selftest.c | 36 +++
> > 5 files changed, 507 insertions(+)
>
> Could you please update Documentation/ftrace.txt?
> I guess many user interesting this patch. :)
Yeah, it has become a really nice feature this way. As i told it
to K.Prasad before: we need this tracer because the data tracer
will likely become the most common usecase of this facility. We
will get the hw breakpoints facility tested and used.
And in fact we can go one step further: it would also be nice to
wire it up with the ftrace histogram code: so that we can get
usage histograms of kernel symbol read/write activities without
the overhead of tracing. (The branch tracer already has this.)
Especially frequently used variables generate a _lot_ of events.
Ingo
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-08 10:09 ` Ingo Molnar
@ 2009-03-08 11:00 ` Frederic Weisbecker
2009-03-10 12:21 ` K.Prasad
2009-03-09 21:36 ` K.Prasad
1 sibling, 1 reply; 33+ messages in thread
From: Frederic Weisbecker @ 2009-03-08 11:00 UTC (permalink / raw)
To: Ingo Molnar
Cc: KOSAKI Motohiro, prasad, Andrew Morton, Linux Kernel Mailing List,
Alan Stern, Roland McGrath
On Sun, Mar 08, 2009 at 11:09:29AM +0100, Ingo Molnar wrote:
>
> * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
>
> > Hi
> >
> > > This patch adds an ftrace plugin to detect and profile memory access over
> > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > addresses.
> > >
> > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > ---
> > > kernel/trace/Kconfig | 6
> > > kernel/trace/Makefile | 1
> > > kernel/trace/trace.h | 16 +
> > > kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> > > kernel/trace/trace_selftest.c | 36 +++
> > > 5 files changed, 507 insertions(+)
> >
> > Could you please update Documentation/ftrace.txt?
> > I guess many user interesting this patch. :)
>
> Yeah, it has become a really nice feature this way. As i told it
> to K.Prasad before: we need this tracer because the data tracer
> will likely become the most common usecase of this facility. We
> will get the hw breakpoints facility tested and used.
>
> And in fact we can go one step further: it would also be nice to
> wire it up with the ftrace histogram code: so that we can get
> usage histograms of kernel symbol read/write activities without
> the overhead of tracing. (The branch tracer already has this.)
>
> Especially frequently used variables generate a _lot_ of events.
>
> Ingo
Right, it will even be an occasion to improve and further test
the histogram tracing.
K. Prasad if you need some help on how to use it, don't hesitate to tell.
Frederic.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-08 10:09 ` Ingo Molnar
2009-03-08 11:00 ` Frederic Weisbecker
@ 2009-03-09 21:36 ` K.Prasad
1 sibling, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-09 21:36 UTC (permalink / raw)
To: Ingo Molnar
Cc: KOSAKI Motohiro, Frédéric Weisbecker, Andrew Morton,
Linux Kernel Mailing List, Alan Stern, Roland McGrath
On Sun, Mar 08, 2009 at 11:09:29AM +0100, Ingo Molnar wrote:
>
> * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
>
> > Hi
> >
> > > This patch adds an ftrace plugin to detect and profile memory access over
> > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > addresses.
> > >
> > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > ---
> > > kernel/trace/Kconfig | 6
> > > kernel/trace/Makefile | 1
> > > kernel/trace/trace.h | 16 +
> > > kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> > > kernel/trace/trace_selftest.c | 36 +++
> > > 5 files changed, 507 insertions(+)
> >
> > Could you please update Documentation/ftrace.txt?
> > I guess many user interesting this patch. :)
>
> Yeah, it has become a really nice feature this way. As i told it
> to K.Prasad before: we need this tracer because the data tracer
> will likely become the most common usecase of this facility. We
> will get the hw breakpoints facility tested and used.
>
> And in fact we can go one step further: it would also be nice to
> wire it up with the ftrace histogram code: so that we can get
> usage histograms of kernel symbol read/write activities without
> the overhead of tracing. (The branch tracer already has this.)
>
> Especially frequently used variables generate a _lot_ of events.
>
> Ingo
I am looking at the histogram infrastructure, its implementation in
branch tracer for getting the same in ksym tracer. Meanwhile I'm thinking
if the patchset can be included in -tip tree for some good amount of
testing and feedback from the community before they make their way into
mainline (2.6.30?).
I will be glad if you can pick the patches to be a part of -tip tree.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-08 11:00 ` Frederic Weisbecker
@ 2009-03-10 12:21 ` K.Prasad
2009-03-10 19:55 ` Frederic Weisbecker
0 siblings, 1 reply; 33+ messages in thread
From: K.Prasad @ 2009-03-10 12:21 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton,
Linux Kernel Mailing List, Alan Stern, Roland McGrath,
Maneesh Soni
On Sun, Mar 08, 2009 at 12:00:40PM +0100, Frederic Weisbecker wrote:
> On Sun, Mar 08, 2009 at 11:09:29AM +0100, Ingo Molnar wrote:
> >
> > * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> >
> > > Hi
> > >
> > > > This patch adds an ftrace plugin to detect and profile memory access over
> > > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > > addresses.
> > > >
> > > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > > ---
> > > > kernel/trace/Kconfig | 6
> > > > kernel/trace/Makefile | 1
> > > > kernel/trace/trace.h | 16 +
> > > > kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> > > > kernel/trace/trace_selftest.c | 36 +++
> > > > 5 files changed, 507 insertions(+)
> > >
> > > Could you please update Documentation/ftrace.txt?
> > > I guess many user interesting this patch. :)
> >
> > Yeah, it has become a really nice feature this way. As i told it
> > to K.Prasad before: we need this tracer because the data tracer
> > will likely become the most common usecase of this facility. We
> > will get the hw breakpoints facility tested and used.
> >
> > And in fact we can go one step further: it would also be nice to
> > wire it up with the ftrace histogram code: so that we can get
> > usage histograms of kernel symbol read/write activities without
> > the overhead of tracing. (The branch tracer already has this.)
> >
> > Especially frequently used variables generate a _lot_ of events.
> >
> > Ingo
>
> Right, it will even be an occasion to improve and further test
> the histogram tracing.
> K. Prasad if you need some help on how to use it, don't hesitate to tell.
>
> Frederic.
>
Hi Frederic,
Thanks for the offer of help.
As I try to get ksym tracer generate histogram information, I see a few
challenges and would like to know your thoughts about them.
- Unlike branch tracer which stores the branch stats in statically
declared data structures ('struct ftrace_branch_data'), ksym
generates data at runtime (during every memory access of interest on
the target variable) and would require dynamic memory allocation. Let's
consider the case where we want the information shown below in the
histogram:
Access_Type Symbol_name Function Counter
------------ ----------- -------- -------
W Sym_A Fn_A 10
W Sym_A Fn_B 15
RW Sym_C Fn_C 20
We need a data structure to store the above information and a new
instance of it for every new Fn_X that accesses Sym_X, while all this
information captured in the context of the hardware breakpoint
exception. I am not sure if dynamically allocating GFP_ATOMIC memory for
such a huge requirement is a good idea.
- Alternatively if we choose to statically declare a data section (as
done by the branch tracer). It should be accompanied by code to check
if we reached the end of section and wrap the pointer around. In effect, it
would become a ring-buffer containing only statistics about the
'snapshot' in the buffer, and not historically aggregated data.
- Removal of the 'Function' column to display only aggregate 'hit'
statistics would help reduce the complexity to a large extent, as the
counter can be embedded in the data structures containing
'ksym_trace_filter' information. But indeed, we are trading useful
information for simplicity.
- Perhaps your proposed enhancements to the 'trace_stat' infrastructure,
say - a generic buffering mechanism to store histogram related
information (or a framework to read through data in the ring-buffer)
would help solve many of the issues. Or is 'histogram' best done in
user-space?
Thanks,
K.Prasad
P.S.: You can refer me as 'Prasad' although I sign as above, which is a
patronymic nomenclature
(http://en.wikipedia.org/wiki/Patronymic_name#Indian_subcontinent).
Here's an illustration from another IBMer:
http://www.almaden.ibm.com/u/mohan/#name :-)
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-10 12:21 ` K.Prasad
@ 2009-03-10 19:55 ` Frederic Weisbecker
0 siblings, 0 replies; 33+ messages in thread
From: Frederic Weisbecker @ 2009-03-10 19:55 UTC (permalink / raw)
To: K.Prasad
Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton,
Linux Kernel Mailing List, Alan Stern, Roland McGrath,
Maneesh Soni
On Tue, Mar 10, 2009 at 05:51:02PM +0530, K.Prasad wrote:
> On Sun, Mar 08, 2009 at 12:00:40PM +0100, Frederic Weisbecker wrote:
> > On Sun, Mar 08, 2009 at 11:09:29AM +0100, Ingo Molnar wrote:
> > >
> > > * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> > >
> > > > Hi
> > > >
> > > > > This patch adds an ftrace plugin to detect and profile memory access over
> > > > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > > > addresses.
> > > > >
> > > > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > > > ---
> > > > > kernel/trace/Kconfig | 6
> > > > > kernel/trace/Makefile | 1
> > > > > kernel/trace/trace.h | 16 +
> > > > > kernel/trace/trace_ksym.c | 448 ++++++++++++++++++++++++++++++++++++++++++
> > > > > kernel/trace/trace_selftest.c | 36 +++
> > > > > 5 files changed, 507 insertions(+)
> > > >
> > > > Could you please update Documentation/ftrace.txt?
> > > > I guess many user interesting this patch. :)
> > >
> > > Yeah, it has become a really nice feature this way. As i told it
> > > to K.Prasad before: we need this tracer because the data tracer
> > > will likely become the most common usecase of this facility. We
> > > will get the hw breakpoints facility tested and used.
> > >
> > > And in fact we can go one step further: it would also be nice to
> > > wire it up with the ftrace histogram code: so that we can get
> > > usage histograms of kernel symbol read/write activities without
> > > the overhead of tracing. (The branch tracer already has this.)
> > >
> > > Especially frequently used variables generate a _lot_ of events.
> > >
> > > Ingo
> >
> > Right, it will even be an occasion to improve and further test
> > the histogram tracing.
> > K. Prasad if you need some help on how to use it, don't hesitate to tell.
> >
> > Frederic.
> >
>
> Hi Frederic,
> Thanks for the offer of help.
>
> As I try to get ksym tracer generate histogram information, I see a few
> challenges and would like to know your thoughts about them.
>
> - Unlike branch tracer which stores the branch stats in statically
> declared data structures ('struct ftrace_branch_data'), ksym
> generates data at runtime (during every memory access of interest on
> the target variable) and would require dynamic memory allocation. Let's
> consider the case where we want the information shown below in the
> histogram:
>
> Access_Type Symbol_name Function Counter
> ------------ ----------- -------- -------
> W Sym_A Fn_A 10
> W Sym_A Fn_B 15
> RW Sym_C Fn_C 20
>
> We need a data structure to store the above information and a new
> instance of it for every new Fn_X that accesses Sym_X, while all this
> information captured in the context of the hardware breakpoint
> exception. I am not sure if dynamically allocating GFP_ATOMIC memory for
> such a huge requirement is a good idea.
Ah, I see... And the number of functions that will dereference it is not
predictable...
> - Alternatively if we choose to statically declare a data section (as
> done by the branch tracer). It should be accompanied by code to check
> if we reached the end of section and wrap the pointer around. In effect, it
> would become a ring-buffer containing only statistics about the
> 'snapshot' in the buffer, and not historically aggregated data.
That's possible, but we could loose interesting cases of function that didn't
dereference a variable for a while but did it often, because they could be
overriden by another. Anyway, it seems a good idea.
> - Removal of the 'Function' column to display only aggregate 'hit'
> statistics would help reduce the complexity to a large extent, as the
> counter can be embedded in the data structures containing
> 'ksym_trace_filter' information. But indeed, we are trading useful
> information for simplicity.
As you said, it would be too much a loss of useful informations.
> - Perhaps your proposed enhancements to the 'trace_stat' infrastructure,
> say - a generic buffering mechanism to store histogram related
> information (or a framework to read through data in the ring-buffer)
> would help solve many of the issues. Or is 'histogram' best done in
> user-space?
For now it's just something that provides some abstracts over the seq file
and sorting facilities.
And if it can be enhanced in any way that can help, it would be great. But
I don't know how I could write something generic enough to support any kind
of problem that matches yours in its pattern.
In such case, the stat tracing looks a bit like regular events tracing: we
don't want to allocate memory for all entries because of the different context
sources and because of the allocation overhead, but we still want to store
all of the events.
Such thing could rely on the ring-buffer that we are using, but we would need to
create a sort of private instance of the ring buffer because we will store our hits
and then we will read all that from the ring buffer directly to get the stats sum.
I'm not sure we can create such private instances yet.
For now I would suggest to pre-allocate a set of entries for each breakpoints, using
a predefined number (I call it n here) of entries and count the hits for the n first
functions that were trapped on the breakpoint.
And if you missed some functions because n is too small, then increment an overrun
variable for the current breakpoint that you can display with the stats.
So that the user will know that he missed some things.
I guess it could be accompagnied by a file to change the value of n.
It's just an opinion anyway.
> Thanks,
> K.Prasad
> P.S.: You can refer me as 'Prasad' although I sign as above, which is a
> patronymic nomenclature
> (http://en.wikipedia.org/wiki/Patronymic_name#Indian_subcontinent).
> Here's an illustration from another IBMer:
> http://www.almaden.ibm.com/u/mohan/#name :-)
Ok :-)
Thanks.
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
[not found] <20090319234044.410725944@K.Prasad>
@ 2009-03-19 23:50 ` K.Prasad
2009-03-20 9:04 ` Frederic Weisbecker
0 siblings, 1 reply; 33+ messages in thread
From: K.Prasad @ 2009-03-19 23:50 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, Maneesh Soni, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: ftrace_hbkpt_12 --]
[-- Type: text/plain, Size: 19482 bytes --]
This patch adds an ftrace plugin to detect and profile memory access over
kernel variables. It uses HW Breakpoint interfaces to 'watch memory
addresses.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
kernel/trace/Kconfig | 21 +
kernel/trace/Makefile | 1
kernel/trace/trace.h | 25 +
kernel/trace/trace_ksym.c | 555 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_selftest.c | 36 ++
5 files changed, 638 insertions(+)
Index: linux-2.6-tip.hbkpt/kernel/trace/Kconfig
===================================================================
--- linux-2.6-tip.hbkpt.orig/kernel/trace/Kconfig
+++ linux-2.6-tip.hbkpt/kernel/trace/Kconfig
@@ -264,6 +264,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
Index: linux-2.6-tip.hbkpt/kernel/trace/Makefile
===================================================================
--- linux-2.6-tip.hbkpt.orig/kernel/trace/Makefile
+++ linux-2.6-tip.hbkpt/kernel/trace/Makefile
@@ -44,5 +44,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_even
obj-$(CONFIG_EVENT_TRACER) += events.o
obj-$(CONFIG_EVENT_TRACER) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
libftrace-y := ftrace.o
Index: linux-2.6-tip.hbkpt/kernel/trace/trace.h
===================================================================
--- linux-2.6-tip.hbkpt.orig/kernel/trace/trace.h
+++ linux-2.6-tip.hbkpt/kernel/trace/trace.h
@@ -12,6 +12,10 @@
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#ifdef CONFIG_KSYM_TRACER
+#include <asm/hw_breakpoint.h>
+#endif
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -37,6 +41,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -214,6 +219,23 @@ struct syscall_trace_exit {
unsigned long ret;
};
+#ifdef CONFIG_KSYM_TRACER
+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbkpt;
+ unsigned long ksym_addr;
+ unsigned long ip;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ unsigned long counter;
+#endif
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
+#else
+struct trace_ksym {
+};
+#endif /* CONFIG_KSYM_TRACER */
/*
* trace_flag_type is an enumeration that holds different
@@ -332,6 +354,7 @@ extern void __ftrace_bad_type(void);
TRACE_SYSCALL_ENTER); \
IF_ASSIGN(var, ent, struct syscall_trace_exit, \
TRACE_SYSCALL_EXIT); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)
@@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
Index: linux-2.6-tip.hbkpt/kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ linux-2.6-tip.hbkpt/kernel/trace/trace_ksym.c
@@ -0,0 +1,555 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#ifdef CONFIG_FTRACE_SELFTEST
+
+static int ksym_selftest_dummy;
+#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
+
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static struct trace_array *ksym_trace_array;
+
+DEFINE_MUTEX(ksym_tracer_mutex);
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+DEFINE_SPINLOCK(ksym_stat_lock);
+
+void ksym_collect_stats(unsigned long hbkpt_hit_addr)
+{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+
+ spin_lock(&ksym_stat_lock);
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if ((entry->ksym_addr == hbkpt_hit_addr) &&
+ (entry->counter <= MAX_UL_INT)) {
+ entry->counter++;
+ break;
+ }
+ }
+ spin_unlock(&ksym_stat_lock);
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbkpt_handler(struct hw_breakpoint *hbkpt, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbkpt->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbkpt = hbkpt;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ ksym_collect_stats(hbkpt->info.address);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
+ (*addr == 0))
+ goto return_code;
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbkpt) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
+ entry->ksym_hbkpt->info.name = ksymname;
+ entry->ksym_hbkpt->info.type = op;
+ entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
+ entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
+
+ entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
+
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ if (ret < 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbkpt->info.name);
+ if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbkpt->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ entry->ksym_hbkpt->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ if (ret == 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ ksym_tracing_enabled = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+
+ /* Free the 'input_string' only if reset
+ * after startup self-test
+ */
+#ifdef CONFIG_FTRACE_SELFTEST
+ if (strncmp(entry->ksym_hbkpt->info.name, KSYM_SELFTEST_ENTRY,
+ strlen(KSYM_SELFTEST_ENTRY)) != 0)
+#endif /* CONFIG_FTRACE_SELFTEST*/
+ kfree(entry->ksym_hbkpt->info.name);
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+#ifdef CONFIG_FTRACE_SELFTEST
+ /* Check if we are re-entering self-test code during initialisation */
+ if (ksym_selftest_dummy)
+ goto ret_path;
+
+ ksym_selftest_dummy = 0;
+
+ /* Register the read-write tracing request */
+ ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+
+ret_path:
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ entry->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbkpt->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+ seq_printf(m, " Access type ");
+ seq_printf(m, " Symbol Counter \n");
+ return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+ struct hlist_node *stat = v;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
+ entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+ if (entry->ksym_hbkpt)
+ access_type = entry->ksym_hbkpt->info.type;
+
+ switch (access_type) {
+ case HW_BREAKPOINT_WRITE:
+ seq_printf(m, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ seq_printf(m, " RW ");
+ break;
+ default:
+ seq_printf(m, " NA ");
+ }
+
+ if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+ seq_printf(m, " %s ", fn_name);
+ else
+ seq_printf(m, " <NA> ");
+
+ seq_printf(m, "%15lu\n", entry->counter);
+ return 0;
+}
+
+static void *ksym_tracer_stat_start(void)
+{
+ return &(ksym_filter_head.first);
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+ struct hlist_node *stat = v;
+
+ return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+ .name = "ksym_tracer",
+ .stat_start = ksym_tracer_stat_start,
+ .stat_next = ksym_tracer_stat_next,
+ .stat_headers = ksym_tracer_stat_headers,
+ .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+ int ret;
+
+ ret = register_stat_tracer(&ksym_tracer_stats);
+ if (!ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "ksym tracer stats\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
Index: linux-2.6-tip.hbkpt/kernel/trace/trace_selftest.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/kernel/trace/trace_selftest.c
+++ linux-2.6-tip.hbkpt/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(stru
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -687,3 +688,38 @@ trace_selftest_startup_branch(struct tra
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-19 23:50 ` K.Prasad
@ 2009-03-20 9:04 ` Frederic Weisbecker
2009-03-21 16:24 ` K.Prasad
0 siblings, 1 reply; 33+ messages in thread
From: Frederic Weisbecker @ 2009-03-20 9:04 UTC (permalink / raw)
To: K.Prasad
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Maneesh Soni, Roland McGrath,
Steven Rostedt
On Fri, Mar 20, 2009 at 05:20:32AM +0530, K.Prasad wrote:
> This patch adds an ftrace plugin to detect and profile memory access over
> kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> addresses.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> kernel/trace/Kconfig | 21 +
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 25 +
> kernel/trace/trace_ksym.c | 555 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_selftest.c | 36 ++
> 5 files changed, 638 insertions(+)
>
> Index: linux-2.6-tip.hbkpt/kernel/trace/Kconfig
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/kernel/trace/Kconfig
> +++ linux-2.6-tip.hbkpt/kernel/trace/Kconfig
> @@ -264,6 +264,27 @@ config POWER_TRACER
> power management decisions, specifically the C-state and P-state
> behavior.
>
> +config KSYM_TRACER
> + bool "Trace read and write access on kernel memory locations"
> + depends on HAVE_HW_BREAKPOINT
> + select TRACING
> + help
> + This tracer helps find read and write operations on any given kernel
> + symbol i.e. /proc/kallsyms.
> +
> +config PROFILE_KSYM_TRACER
> + bool "Profile all kernel memory accesses on 'watched' variables"
> + depends on KSYM_TRACER
> + help
> + This tracer profiles kernel accesses on variables watched through the
> + ksym tracer ftrace plugin. Depending upon the hardware, all read
> + and write operations on kernel variables can be monitored for
> + accesses.
> +
> + The results will be displayed in:
> + /debugfs/tracing/profile_ksym
> +
> + Say N if unsure.
>
> config STACK_TRACER
> bool "Trace max stack"
> Index: linux-2.6-tip.hbkpt/kernel/trace/Makefile
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/kernel/trace/Makefile
> +++ linux-2.6-tip.hbkpt/kernel/trace/Makefile
> @@ -44,5 +44,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_even
> obj-$(CONFIG_EVENT_TRACER) += events.o
> obj-$(CONFIG_EVENT_TRACER) += trace_export.o
> obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
> +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
>
> libftrace-y := ftrace.o
> Index: linux-2.6-tip.hbkpt/kernel/trace/trace.h
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/kernel/trace/trace.h
> +++ linux-2.6-tip.hbkpt/kernel/trace/trace.h
> @@ -12,6 +12,10 @@
> #include <trace/kmemtrace.h>
> #include <trace/power.h>
>
> +#ifdef CONFIG_KSYM_TRACER
> +#include <asm/hw_breakpoint.h>
> +#endif
> +
> enum trace_type {
> __TRACE_FIRST_TYPE = 0,
>
> @@ -37,6 +41,7 @@ enum trace_type {
> TRACE_KMEM_FREE,
> TRACE_POWER,
> TRACE_BLK,
> + TRACE_KSYM,
>
> __TRACE_LAST_TYPE,
> };
> @@ -214,6 +219,23 @@ struct syscall_trace_exit {
> unsigned long ret;
> };
>
> +#ifdef CONFIG_KSYM_TRACER
> +struct trace_ksym {
> + struct trace_entry ent;
> + struct hw_breakpoint *ksym_hbkpt;
> + unsigned long ksym_addr;
> + unsigned long ip;
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> + unsigned long counter;
> +#endif
> + struct hlist_node ksym_hlist;
> + char ksym_name[KSYM_NAME_LEN];
> + char p_name[TASK_COMM_LEN];
> +};
> +#else
> +struct trace_ksym {
> +};
> +#endif /* CONFIG_KSYM_TRACER */
>
> /*
> * trace_flag_type is an enumeration that holds different
> @@ -332,6 +354,7 @@ extern void __ftrace_bad_type(void);
> TRACE_SYSCALL_ENTER); \
> IF_ASSIGN(var, ent, struct syscall_trace_exit, \
> TRACE_SYSCALL_EXIT); \
> + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
> __ftrace_bad_type(); \
> } while (0)
>
> @@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
> struct trace_array *tr);
> extern int trace_selftest_startup_branch(struct tracer *trace,
> struct trace_array *tr);
> +extern int trace_selftest_startup_ksym(struct tracer *trace,
> + struct trace_array *tr);
> #endif /* CONFIG_FTRACE_STARTUP_TEST */
>
> extern void *head_page(struct trace_array_cpu *data);
> Index: linux-2.6-tip.hbkpt/kernel/trace/trace_ksym.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6-tip.hbkpt/kernel/trace/trace_ksym.c
> @@ -0,0 +1,555 @@
> +/*
> + * trace_ksym.c - Kernel Symbol Tracer
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) IBM Corporation, 2009
> + */
> +
> +#include <linux/kallsyms.h>
> +#include <linux/uaccess.h>
> +#include <linux/debugfs.h>
> +#include <linux/ftrace.h>
> +#include <linux/module.h>
> +#include <linux/jhash.h>
> +#include <linux/fs.h>
> +
> +#include "trace_output.h"
> +#include "trace_stat.h"
> +#include "trace.h"
> +
> +/* For now, let us restrict the no. of symbols traced simultaneously to number
> + * of available hardware breakpoint registers.
> + */
> +#define KSYM_TRACER_MAX HB_NUM
> +
> +#define KSYM_TRACER_OP_LEN 3 /* rw- */
> +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> +
> +static int ksym_selftest_dummy;
> +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
> +
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> +static struct trace_array *ksym_trace_array;
> +
> +DEFINE_MUTEX(ksym_tracer_mutex);
> +
> +static unsigned int ksym_filter_entry_count;
> +static unsigned int ksym_tracing_enabled;
> +
> +static HLIST_HEAD(ksym_filter_head);
> +
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +
> +#define MAX_UL_INT 0xffffffff
> +DEFINE_SPINLOCK(ksym_stat_lock);
> +
> +void ksym_collect_stats(unsigned long hbkpt_hit_addr)
> +{
> + struct hlist_node *node;
> + struct trace_ksym *entry;
> +
> + spin_lock(&ksym_stat_lock);
I see that can be called from ksym_hbkpt_handler which in turn
can be called from interrupt context, right?
You can issue a deadlock if you don't disable interrupts here.
Thanks,
Frederic.
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + if ((entry->ksym_addr == hbkpt_hit_addr) &&
> + (entry->counter <= MAX_UL_INT)) {
> + entry->counter++;
> + break;
> + }
> + }
> + spin_unlock(&ksym_stat_lock);
> +}
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> +
> +void ksym_hbkpt_handler(struct hw_breakpoint *hbkpt, struct pt_regs *regs)
> +{
> + struct ring_buffer_event *event;
> + struct trace_array *tr;
> + struct trace_ksym *entry;
> + int pc;
> +
> + if (!ksym_tracing_enabled)
> + return;
> +
> + tr = ksym_trace_array;
> + pc = preempt_count();
> +
> + event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
> + sizeof(*entry), 0, pc);
> + if (!event)
> + return;
> +
> + entry = ring_buffer_event_data(event);
> + strlcpy(entry->ksym_name, hbkpt->info.name, KSYM_SYMBOL_LEN);
> + entry->ksym_hbkpt = hbkpt;
> + entry->ip = instruction_pointer(regs);
> + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> + ksym_collect_stats(hbkpt->info.address);
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> +
> + trace_buffer_unlock_commit(tr, event, 0, pc);
> +}
> +
> +/* Valid access types are represented as
> + *
> + * rw- : Set Read/Write Access Breakpoint
> + * -w- : Set Write Access Breakpoint
> + * --- : Clear Breakpoints
> + * --x : Set Execution Break points (Not available yet)
> + *
> + */
> +static int ksym_trace_get_access_type(char *access_str)
> +{
> + int pos, access = 0;
> +
> + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
> + switch (access_str[pos]) {
> + case 'r':
> + access += (pos == 0) ? 4 : -1;
> + break;
> + case 'w':
> + access += (pos == 1) ? 2 : -1;
> + break;
> + case '-':
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + switch (access) {
> + case 6:
> + access = HW_BREAKPOINT_RW;
> + break;
> + case 2:
> + access = HW_BREAKPOINT_WRITE;
> + break;
> + case 0:
> + access = 0;
> + }
> +
> + return access;
> +}
> +
> +/*
> + * There can be several possible malformed requests and we attempt to capture
> + * all of them. We enumerate some of the rules
> + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
> + * i.e. multiple ':' symbols disallowed. Possible uses are of the form
> + * <module>:<ksym_name>:<op>.
> + * 2. No delimiter symbol ':' in the input string
> + * 3. Spurious operator symbols or symbols not in their respective positions
> + * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
> + * 5. Kernel symbol not a part of /proc/kallsyms
> + * 6. Duplicate requests
> + */
> +static int parse_ksym_trace_str(char *input_string, char **ksymname,
> + unsigned long *addr)
> +{
> + char *delimiter = ":";
> + int ret;
> +
> + ret = -EINVAL;
> + *ksymname = strsep(&input_string, delimiter);
> + *addr = kallsyms_lookup_name(*ksymname);
> +
> + /* Check for malformed request: (2), (1) and (5) */
> + if ((!input_string) ||
> + (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
> + (*addr == 0))
> + goto return_code;
> + ret = ksym_trace_get_access_type(input_string);
> +
> +return_code:
> + return ret;
> +}
> +
> +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> +{
> + struct trace_ksym *entry;
> + int ret;
> +
> + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> + " new requests for tracing can be accepted now.\n",
> + KSYM_TRACER_MAX);
> + return -ENOSPC;
> + }
> +
> + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> + if (!entry)
> + return -ENOMEM;
> +
> + entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> + if (!entry->ksym_hbkpt) {
> + kfree(entry);
> + return -ENOMEM;
> + }
> +
> + entry->ksym_hbkpt->info.name = ksymname;
> + entry->ksym_hbkpt->info.type = op;
> + entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
> + entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
> +
> + entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
> +
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + if (ret < 0) {
> + printk(KERN_INFO "ksym_tracer request failed. Try again"
> + " later!!\n");
> + kfree(entry->ksym_hbkpt);
> + kfree(entry);
> + return -EAGAIN;
> + }
> + hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
> + ksym_filter_entry_count++;
> +
> + return 0;
> +}
> +
> +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> + ssize_t ret, cnt = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> + entry->ksym_hbkpt->info.name);
> + if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "-w-\n");
> + else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "rw-\n");
> + }
> + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> + mutex_unlock(&ksym_tracer_mutex);
> +
> + return ret;
> +}
> +
> +static ssize_t ksym_trace_filter_write(struct file *file,
> + const char __user *buffer,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char *input_string, *ksymname = NULL;
> + unsigned long ksym_addr = 0;
> + int ret, op, changed = 0;
> +
> + /* Ignore echo "" > ksym_trace_filter */
> + if (count == 0)
> + return 0;
> +
> + input_string = kzalloc(count, GFP_KERNEL);
> + if (!input_string)
> + return -ENOMEM;
> +
> + if (copy_from_user(input_string, buffer, count)) {
> + kfree(input_string);
> + return -EFAULT;
> + }
> +
> + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> + if (ret < 0) {
> + kfree(input_string);
> + return ret;
> + }
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + ret = -EINVAL;
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + if (entry->ksym_addr == ksym_addr) {
> + /* Check for malformed request: (6) */
> + if (entry->ksym_hbkpt->info.type != op)
> + changed = 1;
> + else
> + goto err_ret;
> + break;
> + }
> + }
> + if (changed) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + entry->ksym_hbkpt->info.type = op;
> + if (op > 0) {
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + if (ret == 0) {
> + ret = count;
> + goto unlock_ret_path;
> + }
> + }
> + ksym_filter_entry_count--;
> + hlist_del(&(entry->ksym_hlist));
> + kfree(entry->ksym_hbkpt);
> + kfree(entry);
> + ret = count;
> + goto err_ret;
> + } else {
> + /* Check for malformed request: (4) */
> + if (op == 0)
> + goto err_ret;
> + ret = process_new_ksym_entry(ksymname, op, ksym_addr);
> + if (ret)
> + goto err_ret;
> + }
> + ret = count;
> + goto unlock_ret_path;
> +
> +err_ret:
> + kfree(input_string);
> +
> +unlock_ret_path:
> + mutex_unlock(&ksym_tracer_mutex);
> + return ret;
> +}
> +
> +static const struct file_operations ksym_tracing_fops = {
> + .open = tracing_open_generic,
> + .read = ksym_trace_filter_read,
> + .write = ksym_trace_filter_write,
> +};
> +
> +static void ksym_trace_reset(struct trace_array *tr)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node, *node1;
> +
> + ksym_tracing_enabled = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
> + ksym_hlist) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + ksym_filter_entry_count--;
> + hlist_del(&(entry->ksym_hlist));
> +
> + /* Free the 'input_string' only if reset
> + * after startup self-test
> + */
> +#ifdef CONFIG_FTRACE_SELFTEST
> + if (strncmp(entry->ksym_hbkpt->info.name, KSYM_SELFTEST_ENTRY,
> + strlen(KSYM_SELFTEST_ENTRY)) != 0)
> +#endif /* CONFIG_FTRACE_SELFTEST*/
> + kfree(entry->ksym_hbkpt->info.name);
> + kfree(entry->ksym_hbkpt);
> + kfree(entry);
> + }
> + mutex_unlock(&ksym_tracer_mutex);
> +
> +}
> +
> +static int ksym_trace_init(struct trace_array *tr)
> +{
> + int cpu, ret = 0;
> +
> + for_each_online_cpu(cpu)
> + tracing_reset(tr, cpu);
> +
> + ksym_tracing_enabled = 1;
> + ksym_trace_array = tr;
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> + /* Check if we are re-entering self-test code during initialisation */
> + if (ksym_selftest_dummy)
> + goto ret_path;
> +
> + ksym_selftest_dummy = 0;
> +
> + /* Register the read-write tracing request */
> + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> + (unsigned long)(&ksym_selftest_dummy));
> +
> + if (ret < 0) {
> + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> + goto ret_path;
> + }
> + /* Perform a read and a write operation over the dummy variable to
> + * trigger the tracer
> + */
> + if (ksym_selftest_dummy == 0)
> + ksym_selftest_dummy++;
> +
> +ret_path:
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> + return ret;
> +}
> +
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
> +
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + entry->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbkpt->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +struct tracer ksym_tracer __read_mostly =
> +{
> + .name = "ksym_tracer",
> + .init = ksym_trace_init,
> + .reset = ksym_trace_reset,
> +#ifdef CONFIG_FTRACE_SELFTEST
> + .selftest = trace_selftest_startup_ksym,
> +#endif
> + .print_header = ksym_trace_print_header,
> + .print_line = ksym_trace_output
> +};
> +
> +__init static int init_ksym_trace(void)
> +{
> + struct dentry *d_tracer;
> + struct dentry *entry;
> +
> + d_tracer = tracing_init_dentry();
> + ksym_filter_entry_count = 0;
> +
> + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> + NULL, &ksym_tracing_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'ksym_trace_filter' file\n");
> +
> + return register_tracer(&ksym_tracer);
> +}
> +device_initcall(init_ksym_trace);
> +
> +
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +static int ksym_tracer_stat_headers(struct seq_file *m)
> +{
> + seq_printf(m, " Access type ");
> + seq_printf(m, " Symbol Counter \n");
> + return 0;
> +}
> +
> +static int ksym_tracer_stat_show(struct seq_file *m, void *v)
> +{
> + struct hlist_node *stat = v;
> + struct trace_ksym *entry;
> + int access_type = 0;
> + char fn_name[KSYM_NAME_LEN];
> +
> + entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
> +
> + if (entry->ksym_hbkpt)
> + access_type = entry->ksym_hbkpt->info.type;
> +
> + switch (access_type) {
> + case HW_BREAKPOINT_WRITE:
> + seq_printf(m, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + seq_printf(m, " RW ");
> + break;
> + default:
> + seq_printf(m, " NA ");
> + }
> +
> + if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
> + seq_printf(m, " %s ", fn_name);
> + else
> + seq_printf(m, " <NA> ");
> +
> + seq_printf(m, "%15lu\n", entry->counter);
> + return 0;
> +}
> +
> +static void *ksym_tracer_stat_start(void)
> +{
> + return &(ksym_filter_head.first);
> +}
> +
> +static void *
> +ksym_tracer_stat_next(void *v, int idx)
> +{
> + struct hlist_node *stat = v;
> +
> + return stat->next;
> +}
> +
> +static struct tracer_stat ksym_tracer_stats = {
> + .name = "ksym_tracer",
> + .stat_start = ksym_tracer_stat_start,
> + .stat_next = ksym_tracer_stat_next,
> + .stat_headers = ksym_tracer_stat_headers,
> + .stat_show = ksym_tracer_stat_show
> +};
> +
> +__init static int ksym_tracer_stat_init(void)
> +{
> + int ret;
> +
> + ret = register_stat_tracer(&ksym_tracer_stats);
> + if (!ret) {
> + printk(KERN_WARNING "Warning: could not register "
> + "ksym tracer stats\n");
> + return 1;
> + }
> +
> + return 0;
> +}
> +fs_initcall(ksym_tracer_stat_init);
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> Index: linux-2.6-tip.hbkpt/kernel/trace/trace_selftest.c
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/kernel/trace/trace_selftest.c
> +++ linux-2.6-tip.hbkpt/kernel/trace/trace_selftest.c
> @@ -16,6 +16,7 @@ static inline int trace_valid_entry(stru
> case TRACE_BRANCH:
> case TRACE_GRAPH_ENT:
> case TRACE_GRAPH_RET:
> + case TRACE_KSYM:
> return 1;
> }
> return 0;
> @@ -687,3 +688,38 @@ trace_selftest_startup_branch(struct tra
> return ret;
> }
> #endif /* CONFIG_BRANCH_TRACER */
> +
> +#ifdef CONFIG_KSYM_TRACER
> +int
> +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> +{
> + unsigned long count;
> + int ret;
> +
> + /* start the tracing */
> + ret = tracer_init(trace, tr);
> + if (ret) {
> + warn_failed_init_tracer(trace, ret);
> + return ret;
> + }
> +
> + /* Sleep for a 1/10 of a second */
> + msleep(100);
> + /* stop the tracing. */
> + tracing_stop();
> + /* check the trace buffer */
> + ret = trace_test_buffer(tr, &count);
> + trace->reset(tr);
> + tracing_start();
> +
> + /* read & write operations - one each is performed on the dummy variable
> + * triggering two entries in the trace buffer
> + */
> + if (!ret && count != 2) {
> + printk(KERN_CONT "Ksym tracer startup test failed");
> + ret = -1;
> + }
> +
> + return ret;
> +}
> +#endif /* CONFIG_KSYM_TRACER */
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-20 9:04 ` Frederic Weisbecker
@ 2009-03-21 16:24 ` K.Prasad
2009-03-21 16:39 ` Steven Rostedt
0 siblings, 1 reply; 33+ messages in thread
From: K.Prasad @ 2009-03-21 16:24 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Maneesh Soni, Roland McGrath,
Steven Rostedt
On Fri, Mar 20, 2009 at 10:04:52AM +0100, Frederic Weisbecker wrote:
> On Fri, Mar 20, 2009 at 05:20:32AM +0530, K.Prasad wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over
> > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > addresses.
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > kernel/trace/Kconfig | 21 +
> > kernel/trace/Makefile | 1
> > kernel/trace/trace.h | 25 +
> > kernel/trace/trace_ksym.c | 555 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/trace/trace_selftest.c | 36 ++
> > 5 files changed, 638 insertions(+)
> >
> > +
> > +void ksym_collect_stats(unsigned long hbkpt_hit_addr)
> > +{
> > + struct hlist_node *node;
> > + struct trace_ksym *entry;
> > +
> > + spin_lock(&ksym_stat_lock);
>
>
> I see that can be called from ksym_hbkpt_handler which in turn
> can be called from interrupt context, right?
> You can issue a deadlock if you don't disable interrupts here.
>
> Thanks,
> Frederic.
>
ksym_collect_stats<--ksym_hbkpt_handler<--hw_breakpoint_handler<--do_debug
invocation happens with interrupts enabled (IF bit is set). I do find
that a few plugins in kernel/trace enclose the
trace_buffer_lock_reserve()--trace_buffer_unlock_commit() invocation
within interrupt-disabled code. Is that a requirement there?
The potential deadlock scenario you foresee isn't obvious to me. Can you
explain?
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-21 16:24 ` K.Prasad
@ 2009-03-21 16:39 ` Steven Rostedt
2009-03-23 19:08 ` K.Prasad
0 siblings, 1 reply; 33+ messages in thread
From: Steven Rostedt @ 2009-03-21 16:39 UTC (permalink / raw)
To: K.Prasad
Cc: Frederic Weisbecker, Ingo Molnar, Linux Kernel Mailing List,
Alan Stern, Andrew Morton, Benjamin Herrenschmidt, Maneesh Soni,
Roland McGrath
On Sat, 21 Mar 2009, K.Prasad wrote:
> > >
>
> > > +
> > > +void ksym_collect_stats(unsigned long hbkpt_hit_addr)
> > > +{
> > > + struct hlist_node *node;
> > > + struct trace_ksym *entry;
> > > +
> > > + spin_lock(&ksym_stat_lock);
> >
> >
> > I see that can be called from ksym_hbkpt_handler which in turn
> > can be called from interrupt context, right?
> > You can issue a deadlock if you don't disable interrupts here.
> >
> > Thanks,
> > Frederic.
> >
>
> ksym_collect_stats<--ksym_hbkpt_handler<--hw_breakpoint_handler<--do_debug
> invocation happens with interrupts enabled (IF bit is set). I do find
> that a few plugins in kernel/trace enclose the
> trace_buffer_lock_reserve()--trace_buffer_unlock_commit() invocation
> within interrupt-disabled code. Is that a requirement there?
>
> The potential deadlock scenario you foresee isn't obvious to me. Can you
> explain?
Can that lock ever be taken in an interrupt? If not, document that (and
perhaps add a WARN_ON(in_interrupt()); ). Otherwise you have a possible:
spin_lock(&ksym_stat_lock);
===> take interrupt ...
(from interrupt)
spin_lock(&ksym_stat_lock); <== deadlock.
-- Steve
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-24 15:28 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
@ 2009-03-22 9:35 ` Pavel Machek
2009-03-25 3:03 ` Steven Rostedt
2009-03-25 6:35 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v3 K.Prasad
2 siblings, 0 replies; 33+ messages in thread
From: Pavel Machek @ 2009-03-22 9:35 UTC (permalink / raw)
To: K.Prasad
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Frederic Weisbecker, maneesh,
Roland McGrath, Steven Rostedt
Hi!
> This patch adds an ftrace plugin to detect and profile memory access over
> kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> addresses.
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
...
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + entry->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbp->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +static int ksym_tracer_stat_headers(struct seq_file *m)
> +{
> + seq_printf(m, " Access type ");
> + seq_printf(m, " Symbol Counter \n");
> + return 0;
> +}
...
> + switch (access_type) {
> + case HW_BREAKPOINT_WRITE:
> + seq_printf(m, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + seq_printf(m, " RW ");
> + break;
> + default:
> + seq_printf(m, " NA ");
> + }
> +
> + if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
> + seq_printf(m, " %s ", fn_name);
> + else
> + seq_printf(m, " <NA> ");
> +
> + seq_printf(m, "%15lu\n", entry->counter);
> + return 0;
> +}
Quite as lot of ascii-art here. Is userland expected to parse it? If
so, how? (docs would be nice) What if you'll want to make fields wider one day?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-21 16:39 ` Steven Rostedt
@ 2009-03-23 19:08 ` K.Prasad
0 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-23 19:08 UTC (permalink / raw)
To: Steven Rostedt
Cc: Frederic Weisbecker, Ingo Molnar, Linux Kernel Mailing List,
Alan Stern, Andrew Morton, Benjamin Herrenschmidt, Maneesh Soni,
Roland McGrath
On Sat, Mar 21, 2009 at 12:39:08PM -0400, Steven Rostedt wrote:
>
> On Sat, 21 Mar 2009, K.Prasad wrote:
> > > >
> >
> > > > +
> > > > +void ksym_collect_stats(unsigned long hbkpt_hit_addr)
> > > > +{
> > > > + struct hlist_node *node;
> > > > + struct trace_ksym *entry;
> > > > +
> > > > + spin_lock(&ksym_stat_lock);
> > >
> > >
> > > I see that can be called from ksym_hbkpt_handler which in turn
> > > can be called from interrupt context, right?
> > > You can issue a deadlock if you don't disable interrupts here.
> > >
> > > Thanks,
> > > Frederic.
> > >
> >
> > ksym_collect_stats<--ksym_hbkpt_handler<--hw_breakpoint_handler<--do_debug
> > invocation happens with interrupts enabled (IF bit is set). I do find
> > that a few plugins in kernel/trace enclose the
> > trace_buffer_lock_reserve()--trace_buffer_unlock_commit() invocation
> > within interrupt-disabled code. Is that a requirement there?
> >
> > The potential deadlock scenario you foresee isn't obvious to me. Can you
> > explain?
>
> Can that lock ever be taken in an interrupt? If not, document that (and
> perhaps add a WARN_ON(in_interrupt()); ). Otherwise you have a possible:
>
> spin_lock(&ksym_stat_lock);
>
> ===> take interrupt ...
>
> (from interrupt)
> spin_lock(&ksym_stat_lock); <== deadlock.
>
>
> -- Steve
>
Given that the function pointed by the trigger() routine is invoked with
breakpoints disabled on that CPU, I don't think we'd enter into a loop
a cyclic dependancy as above.
On the other hand, my observation w.r.t. IF bit being set was misplaced
in the sense that it corresponded to the saved stack and not when inside
the breakpoint handler in which case interrupts were disabled.
So we are safe in either ways.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 01/11] Introducing generic hardware breakpoint handler interfaces
[not found] <20090324152028.754123712@K.Prasad>
@ 2009-03-24 15:24 ` K.Prasad
2009-03-24 15:25 ` [Patch 02/11] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
` (9 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:24 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 1 --]
[-- Type: text/plain, Size: 16537 bytes --]
This patch introduces two new files hw_breakpoint.[ch] which defines the
generic interfaces to use hardware breakpoint infrastructure of the system.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/Kconfig | 4
include/asm-generic/hw_breakpoint.h | 138 +++++++++++++
kernel/Makefile | 1
kernel/hw_breakpoint.c | 367 ++++++++++++++++++++++++++++++++++++
4 files changed, 510 insertions(+)
Index: linux-2.6-tip/kernel/hw_breakpoint.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/kernel/hw_breakpoint.c
@@ -0,0 +1,367 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ *
+ * This file contains the arch-independent routines. It is not meant
+ * to be compiled as a standalone source file; rather it should be
+ * #include'd by the arch-specific implementation.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/*
+ * Mutex that protects all (un)register operations over kernel/user-space
+ * breakpoint requests
+ */
+DEFINE_MUTEX(hw_breakpoint_mutex);
+
+/* Array of kernel-space breakpoint structures */
+struct hw_breakpoint *hbp_kernel[HB_NUM];
+
+/*
+ * Kernel breakpoints grow downwards, starting from HB_NUM
+ * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
+ * kernel-space request
+ */
+unsigned int hbp_kernel_pos;
+
+/*
+ * An array containing refcount of threads using a given bkpt register
+ * Accesses are synchronised by acquiring hw_breakpoint_mutex
+ */
+unsigned int hbp_user_refcount[HB_NUM];
+
+struct task_struct *last_debugged_task;
+
+/*
+ * Install the debug register values for a new thread.
+ */
+void switch_to_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ /* Set the debug register */
+ arch_install_thread_hw_breakpoint(tsk);
+ last_debugged_task = current;
+}
+
+/*
+ * Install the debug register values for just the kernel, no thread.
+ */
+void switch_to_none_hw_breakpoint(void)
+{
+ arch_install_none();
+}
+
+/*
+ * Load the debug registers during startup of a CPU.
+ */
+void load_debug_registers(void)
+{
+ unsigned long flags;
+ struct task_struct *tsk = current;
+
+ mutex_lock(&hw_breakpoint_mutex);
+
+ /* Prevent IPIs for new kernel breakpoint updates */
+ local_irq_save(flags);
+ arch_load_debug_registers();
+ local_irq_restore(flags);
+
+ if (test_tsk_thread_flag(tsk, TIF_DEBUG))
+ switch_to_thread_hw_breakpoint(tsk);
+
+ mutex_unlock(&hw_breakpoint_mutex);
+}
+
+/*
+ * Erase all the hardware breakpoint info associated with a thread.
+ *
+ * If tsk != current then tsk must not be usable (for example, a
+ * child being cleaned up from a failed fork).
+ */
+void flush_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *thread = &(tsk->thread);
+
+ mutex_lock(&hw_breakpoint_mutex);
+
+ /* The thread no longer has any breakpoints associated with it */
+ clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ for (i = 0; i < HB_NUM; i++) {
+ if (thread->hbp[i]) {
+ hbp_user_refcount[i]--;
+ kfree(thread->hbp[i]);
+ thread->hbp[i] = NULL;
+ }
+ }
+ thread->hbp_num_installed = 0;
+
+ /* Actually uninstall the breakpoints if necessary */
+ if (tsk == current)
+ switch_to_none_hw_breakpoint();
+ mutex_unlock(&hw_breakpoint_mutex);
+}
+
+/*
+ * Copy the hardware breakpoint info from a thread to its cloned child.
+ */
+int copy_thread_hw_breakpoint(struct task_struct *tsk,
+ struct task_struct *child, unsigned long clone_flags)
+{
+ /*
+ * We will assume that breakpoint settings are not inherited
+ * and the child starts out with no debug registers set.
+ * But what about CLONE_PTRACE?
+ */
+ clear_tsk_thread_flag(child, TIF_DEBUG);
+ return 0;
+}
+
+/*
+ * Validate the settings in a hw_breakpoint structure.
+ */
+static int validate_settings(struct hw_breakpoint *bp, struct task_struct *tsk)
+{
+ int ret;
+ unsigned int align;
+
+ if (!bp)
+ return -EINVAL;
+
+ ret = arch_validate_hwbkpt_settings(bp, &align, tsk);
+ if (ret < 0)
+ goto err;
+
+ /*
+ * Check that the low-order bits of the address are appropriate
+ * for the alignment implied by len.
+ */
+ if (bp->info.address & align)
+ return -EINVAL;
+
+ /* Check that the virtual address is in the proper range */
+ if (tsk) {
+ if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
+ return -EFAULT;
+ } else {
+ if (!arch_check_va_in_kernelspace(bp->info.address,
+ bp->info.len))
+ return -EFAULT;
+ }
+ err:
+ return ret;
+}
+
+int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc;
+
+ /* Do not overcommit. Fail if kernel has used the hbp registers */
+ if (pos >= hbp_kernel_pos)
+ return -ENOSPC;
+
+ rc = validate_settings(bp, tsk);
+ if (rc)
+ return rc;
+
+ thread->hbp[pos] = bp;
+ thread->hbp_num_installed++;
+ hbp_user_refcount[pos]++;
+
+ arch_register_user_hw_breakpoint(pos, bp, tsk);
+
+ /*
+ * Does it need to be installed right now?
+ * Otherwise it will get installed the next time tsk runs
+ */
+ if (tsk == current)
+ switch_to_thread_hw_breakpoint(tsk);
+ return rc;
+}
+
+/*
+ * Modify the address of a hbp register already in use by the task
+ * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
+ */
+int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ int rc;
+ struct thread_struct *thread = &(tsk->thread);
+
+ if ((pos >= hbp_kernel_pos) || (validate_settings(bp, tsk)))
+ return -EINVAL;
+
+ thread->hbp[pos] = bp;
+
+ /*
+ * 'pos' must be that of a hbp register already used by 'tsk'
+ * Otherwise arch_modify_user_hw_breakpoint() will fail
+ */
+ rc = arch_modify_user_hw_breakpoint(pos, bp, tsk);
+ if (rc)
+ return rc;
+
+ if (tsk == current)
+ switch_to_thread_hw_breakpoint(tsk);
+ return 0;
+}
+
+/*
+ * Actual implementation of unregister_user_hw_breakpoint.
+ */
+void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ struct thread_struct *thread = &(tsk->thread);
+
+ if (!bp)
+ return;
+
+ hbp_user_refcount[pos]--;
+ thread->hbp_num_installed--;
+
+ arch_unregister_user_hw_breakpoint(pos, bp, tsk);
+
+ if (tsk == current)
+ switch_to_thread_hw_breakpoint(tsk);
+ kfree(tsk->thread.hbp[pos]);
+ tsk->thread.hbp[pos] = NULL;
+}
+
+/**
+ * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
+ * @bp: the breakpoint structure to register
+ *
+ * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
+ * @bp->triggered must be set properly before invocation
+ *
+ */
+int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+{
+ int rc;
+
+ rc = validate_settings(bp, NULL);
+ if (rc)
+ return rc;
+
+ mutex_lock(&hw_breakpoint_mutex);
+
+ rc = -EINVAL;
+ /* Check if we are over-committing */
+ if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
+ hbp_kernel_pos--;
+ hbp_kernel[hbp_kernel_pos] = bp;
+ arch_register_kernel_hw_breakpoint(hbp_kernel_pos);
+ rc = 0;
+ }
+
+ mutex_unlock(&hw_breakpoint_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
+
+/**
+ * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
+ * @bp: the breakpoint structure to unregister
+ *
+ * Uninstalls and unregisters @bp.
+ */
+void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+{
+ int i, j;
+
+ mutex_lock(&hw_breakpoint_mutex);
+
+ /* Find the 'bp' in our list of breakpoints for kernel */
+ for (i = hbp_kernel_pos; i < HB_NUM; i++)
+ if (bp == hbp_kernel[i])
+ break;
+ /*
+ * We'll shift the breakpoints one-level above to compact if
+ * unregistration creates a hole
+ */
+ if (i > hbp_kernel_pos)
+ for (j = i; j == hbp_kernel_pos; j--)
+ hbp_kernel[j] = hbp_kernel[j-1];
+
+ /*
+ * Delete the last kernel breakpoint entry after compaction and update
+ * the pointer to the lowest numbered kernel entry after updating its
+ * corresponding value in kdr7
+ */
+ hbp_kernel[hbp_kernel_pos] = 0;
+ arch_unregister_kernel_hw_breakpoint();
+ hbp_kernel_pos++;
+
+ mutex_unlock(&hw_breakpoint_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
+
+/*
+ * Handle debug exception notifications.
+ */
+static int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ if (val != DIE_DEBUG)
+ return NOTIFY_DONE;
+
+ return hw_breakpoint_handler(data);
+}
+
+static struct notifier_block hw_breakpoint_exceptions_nb = {
+ .notifier_call = hw_breakpoint_exceptions_notify,
+ /* we need to be notified first */
+ .priority = 0x7fffffff
+};
+
+static int __init init_hw_breakpoint(void)
+{
+ int i;
+
+ hbp_kernel_pos = HB_NUM;
+ for (i = 0; i < HB_NUM; i++)
+ hbp_user_refcount[i] = 0;
+ load_debug_registers();
+
+ return register_die_notifier(&hw_breakpoint_exceptions_nb);
+}
+
+core_initcall(init_hw_breakpoint);
Index: linux-2.6-tip/kernel/Makefile
===================================================================
--- linux-2.6-tip.orig/kernel/Makefile
+++ linux-2.6-tip/kernel/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: linux-2.6-tip/include/asm-generic/hw_breakpoint.h
===================================================================
--- /dev/null
+++ linux-2.6-tip/include/asm-generic/hw_breakpoint.h
@@ -0,0 +1,138 @@
+#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
+#define _ASM_GENERIC_HW_BREAKPOINT_H
+
+#ifndef __ARCH_HW_BREAKPOINT_H
+#error "Please don't include this file directly"
+#endif
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+/**
+ * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
+ * @triggered: callback invoked after target address access
+ * @info: arch-specific breakpoint info (address, length, and type)
+ *
+ * %hw_breakpoint structures are the kernel's way of representing
+ * hardware breakpoints. These are data breakpoints
+ * (also known as "watchpoints", triggered on data access), and the breakpoint's
+ * target address can be located in either kernel space or user space.
+ *
+ * The breakpoint's address, length, and type are highly
+ * architecture-specific. The values are encoded in the @info field; you
+ * specify them when registering the breakpoint. To examine the encoded
+ * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
+ * below.
+ *
+ * The address is specified as a regular kernel pointer (for kernel-space
+ * breakponts) or as an %__user pointer (for user-space breakpoints).
+ * With register_user_hw_breakpoint(), the address must refer to a
+ * location in user space. The breakpoint will be active only while the
+ * requested task is running. Conversely with
+ * register_kernel_hw_breakpoint(), the address must refer to a location
+ * in kernel space, and the breakpoint will be active on all CPUs
+ * regardless of the current task.
+ *
+ * The length is the breakpoint's extent in bytes, which is subject to
+ * certain limitations. include/asm/hw_breakpoint.h contains macros
+ * defining the available lengths for a specific architecture. Note that
+ * the address's alignment must match the length. The breakpoint will
+ * catch accesses to any byte in the range from address to address +
+ * (length - 1).
+ *
+ * The breakpoint's type indicates the sort of access that will cause it
+ * to trigger. Possible values may include:
+ *
+ * %HW_BREAKPOINT_RW (triggered on read or write access),
+ * %HW_BREAKPOINT_WRITE (triggered on write access), and
+ * %HW_BREAKPOINT_READ (triggered on read access).
+ *
+ * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
+ * possibilities are available on all architectures. Execute breakpoints
+ * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
+ *
+ * When a breakpoint gets hit, the @triggered callback is
+ * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
+ * processor registers.
+ * Data breakpoints occur after the memory access has taken place.
+ * Breakpoints are disabled during execution @triggered, to avoid
+ * recursive traps and allow unhindered access to breakpointed memory.
+ *
+ * This sample code sets a breakpoint on pid_max and registers a callback
+ * function for writes to that variable. Note that it is not portable
+ * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * #include <asm/hw_breakpoint.h>
+ *
+ * struct hw_breakpoint my_bp;
+ *
+ * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+ * {
+ * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
+ * dump_stack();
+ * .......<more debugging output>........
+ * }
+ *
+ * static struct hw_breakpoint my_bp;
+ *
+ * static int init_module(void)
+ * {
+ * ..........<do anything>............
+ * my_bp.info.type = HW_BREAKPOINT_WRITE;
+ * my_bp.info.len = HW_BREAKPOINT_LEN_4;
+ *
+ * my_bp.installed = (void *)my_bp_installed;
+ *
+ * rc = register_kernel_hw_breakpoint(&my_bp);
+ * ..........<do anything>............
+ * }
+ *
+ * static void cleanup_module(void)
+ * {
+ * ..........<do anything>............
+ * unregister_kernel_hw_breakpoint(&my_bp);
+ * ..........<do anything>............
+ * }
+ *
+ * ----------------------------------------------------------------------
+ */
+struct hw_breakpoint {
+ void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
+ struct arch_hw_breakpoint info;
+};
+
+/*
+ * len and type values are defined in include/asm/hw_breakpoint.h.
+ * Available values vary according to the architecture. On i386 the
+ * possibilities are:
+ *
+ * HW_BREAKPOINT_LEN_1
+ * HW_BREAKPOINT_LEN_2
+ * HW_BREAKPOINT_LEN_4
+ * HW_BREAKPOINT_LEN_EXECUTE
+ * HW_BREAKPOINT_RW
+ * HW_BREAKPOINT_READ
+ * HW_BREAKPOINT_EXECUTE
+ *
+ * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
+ * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
+ * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
+ */
+
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+void switch_to_none_hw_breakpoint(void);
+
+extern unsigned int hbp_kernel_pos;
+extern struct task_struct *last_debugged_task;
+extern struct mutex hw_breakpoint_mutex;
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
Index: linux-2.6-tip/arch/Kconfig
===================================================================
--- linux-2.6-tip.orig/arch/Kconfig
+++ linux-2.6-tip/arch/Kconfig
@@ -109,3 +109,7 @@ config HAVE_CLK
config HAVE_DMA_API_DEBUG
bool
+
+config HAVE_HW_BREAKPOINT
+ bool
+
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 02/11] x86 architecture implementation of Hardware Breakpoint interfaces
[not found] <20090324152028.754123712@K.Prasad>
2009-03-24 15:24 ` [Patch 01/11] Introducing generic hardware breakpoint handler interfaces K.Prasad
@ 2009-03-24 15:25 ` K.Prasad
2009-03-24 15:25 ` [Patch 03/11] Modifying generic debug exception to use thread-specific debug registers K.Prasad
` (8 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:25 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 2 --]
[-- Type: text/plain, Size: 15088 bytes --]
This patch introduces two new files named hw_breakpoint.[ch] inside x86 specific
directories. They contain functions which help validate and serve requests for
using Hardware Breakpoint registers on x86 processors.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/Kconfig | 1
arch/x86/include/asm/hw_breakpoint.h | 73 +++++
arch/x86/kernel/Makefile | 2
arch/x86/kernel/hw_breakpoint.c | 441 +++++++++++++++++++++++++++++++++++
4 files changed, 516 insertions(+), 1 deletion(-)
Index: linux-2.6-tip/arch/x86/kernel/hw_breakpoint.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,441 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/* Unmasked kernel DR7 value */
+static unsigned long kdr7;
+
+/*
+ * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
+ * Used to clear and verify the status of bits corresponding to DR0 - DR3
+ */
+static const unsigned long dr7_masks[HB_NUM] = {
+ 0x000f0003, /* LEN0, R/W0, G0, L0 */
+ 0x00f0000c, /* LEN1, R/W1, G1, L1 */
+ 0x0f000030, /* LEN2, R/W2, G2, L2 */
+ 0xf00000c0 /* LEN3, R/W3, G3, L3 */
+};
+
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+static unsigned long encode_dr7(int drnum, unsigned len, unsigned type)
+{
+ unsigned long temp;
+
+ temp = (len | type) & 0xf;
+ temp <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+ temp |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
+ DR_GLOBAL_SLOWDOWN;
+ return temp;
+}
+
+/*
+ * Install the kernel breakpoints in their debug registers.
+ * If 0 <= pos < HB_NUM, then set the debug register corresponding to that number
+ * If 'pos' is negative, then all debug registers are updated
+ */
+void arch_install_kernel_hw_breakpoint(void *idx)
+{
+ int pos = *(int *)idx;
+ unsigned long dr7;
+ int i;
+
+ get_debugreg(dr7, 7);
+
+ /* Don't allow debug exceptions while we update the registers */
+ set_debugreg(0UL, 7);
+
+ for (i = hbp_kernel_pos; i < HB_NUM; i++) {
+ if ((pos >= 0) && (i != pos))
+ continue;
+ dr7 &= ~(dr7_masks[i]);
+ if (hbp_kernel[i])
+ set_debugreg(hbp_kernel[i]->info.address, i);
+ }
+
+ dr7 |= kdr7;
+ /* No need to set DR6 */
+ set_debugreg(dr7, 7);
+}
+
+void arch_load_debug_registers()
+{
+ int pos = -1;
+ /*
+ * We want all debug registers to be initialised for this
+ * CPU so pos = -1
+ */
+ arch_install_kernel_hw_breakpoint((void *)&pos);
+}
+
+/*
+ * Install the thread breakpoints in their debug registers.
+ */
+void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *thread = &(tsk->thread);
+
+ for (i = 0; (i < hbp_kernel_pos) && hbp_user_refcount[i]; i++)
+ if (thread->hbp[i])
+ set_debugreg(thread->hbp[i]->info.address, i);
+
+ /* No need to set DR6 */
+
+ set_debugreg((kdr7 | thread->dr7), 7);
+}
+
+/*
+ * Install the debug register values for just the kernel, no thread.
+ */
+void arch_install_none()
+{
+ /* Clear the user-space portion of dr7 by setting only kdr7 */
+ set_debugreg(kdr7, 7);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+ unsigned int len_in_bytes = 0;
+
+ switch (hbp_len) {
+ case HW_BREAKPOINT_LEN_1:
+ len_in_bytes = 1;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ len_in_bytes = 2;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ len_in_bytes = 4;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ len_in_bytes = 8;
+ break;
+#endif
+ }
+ return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return ((va >= TASK_SIZE) && ((va + len) >= TASK_SIZE));
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+void arch_store_info(struct hw_breakpoint *bp)
+{
+ /*
+ * User-space requests will always have the address field populated
+ * For kernel-addresses, either the address or symbol name can be
+ * specified.
+ */
+ if (bp->info.address)
+ return;
+ if (bp->info.name)
+ bp->info.address = (unsigned long)
+ kallsyms_lookup_name(bp->info.name);
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
+ unsigned int *align, struct task_struct *tsk)
+{
+ int ret = -EINVAL;
+
+ switch (bp->info.type) {
+
+ /*
+ * Ptrace-refactoring code
+ * For now, we'll allow instruction breakpoint only for user-space
+ * addresses
+ */
+ case HW_BREAKPOINT_EXECUTE:
+ if ((!arch_check_va_in_userspace(bp->info.address,
+ bp->info.len)) &&
+ bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
+ return ret;
+ break;
+ case HW_BREAKPOINT_WRITE:
+ break;
+ case HW_BREAKPOINT_RW:
+ break;
+ default:
+ return ret;
+ }
+
+ switch (bp->info.len) {
+ case HW_BREAKPOINT_LEN_1:
+ *align = 0;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ *align = 1;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ *align = 3;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ *align = 7;
+ break;
+#endif
+ default:
+ return ret;
+ }
+
+ if (bp->triggered) {
+ ret = 0;
+ arch_store_info(bp);
+ }
+ return ret;
+}
+
+/*
+ * Register a new user breakpoint structure.
+ */
+void arch_register_user_hw_breakpoint(int pos, struct hw_breakpoint *bp,
+ struct task_struct *tsk)
+{
+ struct thread_struct *thread = &(tsk->thread);
+
+ thread->dr7 &= ~dr7_masks[pos];
+ thread->dr7 |= encode_dr7(pos, bp->info.len, bp->info.type);
+}
+
+/*
+ * Modify an existing user breakpoint structure.
+ */
+int arch_modify_user_hw_breakpoint(int pos, struct hw_breakpoint *bp,
+ struct task_struct *tsk)
+{
+ struct thread_struct *thread = &(tsk->thread);
+
+ /* Check if the register to be modified was enabled by the thread */
+ if (!(thread->dr7 & (1 << (pos * DR_ENABLE_SIZE))))
+ return -EINVAL;
+
+ thread->dr7 &= ~dr7_masks[pos];
+ thread->dr7 |= encode_dr7(pos, bp->info.len, bp->info.type);
+
+ return 0;
+}
+
+/*
+ * Unregister a user breakpoint structure.
+ */
+void arch_unregister_user_hw_breakpoint(int pos, struct hw_breakpoint *bp,
+ struct task_struct *tsk)
+{
+ struct thread_struct *thread = &(tsk->thread);
+
+ if (!thread->hbp[pos])
+ return;
+
+ thread->hbp[pos]->info.address = 0;
+ thread->dr7 &= ~dr7_masks[pos];
+}
+
+/*
+ * Register a kernel breakpoint structure.
+ */
+void arch_register_kernel_hw_breakpoint(int pos)
+{
+ struct hw_breakpoint *bp;
+
+ bp = hbp_kernel[pos];
+
+ kdr7 &= ~(dr7_masks[pos]);
+ if (bp)
+ kdr7 |= encode_dr7(pos, bp->info.len, bp->info.type);
+
+ on_each_cpu(arch_install_kernel_hw_breakpoint, (void *)&pos, 0);
+}
+
+/*
+ * Unregister a kernel breakpoint structure.
+ */
+void arch_unregister_kernel_hw_breakpoint()
+{
+ int i, pos = -1;
+
+ /*
+ * Modify kdr7 to reflect the new layout of kernel-space breakpoints
+ * and invoke the routine to write breakpoint addresses onto the
+ * physical registers
+ */
+ for (i = hbp_kernel_pos; i < HB_NUM; i++) {
+ kdr7 &= ~(dr7_masks[i]);
+ if (hbp_kernel[i]) {
+ struct hw_breakpoint *bp = hbp_kernel[i];
+ kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
+ }
+ }
+ on_each_cpu(arch_install_kernel_hw_breakpoint, (void *)&pos, 0);
+}
+
+
+/*
+ * Copy out the debug register information for a core dump.
+ *
+ * tsk must be equal to current.
+ */
+void dump_thread_hw_breakpoint(struct task_struct *tsk, int u_debugreg[8])
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int i;
+
+ memset(u_debugreg, 0, sizeof u_debugreg);
+ for (i = 0; i < thread->hbp_num_installed && thread->hbp[i]; ++i)
+ u_debugreg[i] = thread->hbp[i]->info.address;
+ u_debugreg[7] = thread->dr7;
+ u_debugreg[6] = thread->dr6;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int i, rc = NOTIFY_DONE;
+ struct hw_breakpoint *bp;
+ /* The DR6 value is stored in args->err */
+ unsigned long dr7, dr6 = args->err;
+
+ if (dr6 & DR_STEP)
+ return NOTIFY_DONE;
+
+ get_debugreg(dr7, 7);
+
+ /* Disable breakpoints during exception handling */
+ set_debugreg(0UL, 7);
+
+ /*
+ * Assert that local interrupts are disabled
+ * Reset the DRn bits in the virtualized register value.
+ * The ptrace trigger routine will add in whatever is needed.
+ */
+ current->thread.dr6 &= ~(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3);
+
+ /* Lazy debug register switching */
+ if (last_debugged_task != current)
+ switch_to_none_hw_breakpoint();
+
+ /* Handle all the breakpoints that were triggered */
+ for (i = 0; i < HB_NUM; ++i) {
+ if (likely(!(dr6 & (DR_TRAP0 << i))))
+ continue;
+ /*
+ * Find the corresponding hw_breakpoint structure and
+ * invoke its triggered callback.
+ */
+ if (hbp_user_refcount[i])
+ bp = current->thread.hbp[i];
+ else if (i >= hbp_kernel_pos)
+ bp = hbp_kernel[i];
+ else /* False alarm due to lazy DR switching */
+ continue;
+
+ if (!bp)
+ break;
+
+ switch (bp->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ case HW_BREAKPOINT_RW:
+ if (bp->triggered)
+ (bp->triggered)(bp, args->regs);
+
+ if (arch_check_va_in_userspace(bp->info.address,
+ bp->info.len))
+ rc = NOTIFY_DONE;
+ else
+ rc = NOTIFY_STOP;;
+ goto exit;
+ case HW_BREAKPOINT_EXECUTE:
+ /*
+ * Presently we allow instruction breakpoints only in
+ * user-space when requested through ptrace.
+ */
+ if (arch_check_va_in_userspace(bp->info.address,
+ bp->info.len)) {
+ (bp->triggered)(bp, args->regs);
+ /*
+ * do_debug will notify user through a SIGTRAP
+ * signal So we are not requesting a
+ * NOTIFY_STOP here
+ */
+ rc = NOTIFY_DONE;
+ goto exit;
+ }
+ }
+ }
+
+ /* Stop processing further if the exception is a stray one */
+ if (!(dr6 & ~(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)))
+ rc = NOTIFY_STOP;
+exit:
+ set_debugreg(dr7, 7);
+ return rc;
+}
Index: linux-2.6-tip/arch/x86/include/asm/hw_breakpoint.h
===================================================================
--- /dev/null
+++ linux-2.6-tip/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
+#ifndef _I386_HW_BREAKPOINT_H
+#define _I386_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+#define __ARCH_HW_BREAKPOINT_H
+
+struct arch_hw_breakpoint {
+ char *name; /* Contains name of the symbol to set bkpt */
+ unsigned long address;
+ u8 len;
+ u8 type;
+};
+
+#include <linux/kdebug.h>
+#include <asm-generic/hw_breakpoint.h>
+
+/* Available HW breakpoint length encodings */
+#define HW_BREAKPOINT_LEN_1 0x40
+#define HW_BREAKPOINT_LEN_2 0x44
+#define HW_BREAKPOINT_LEN_4 0x4c
+#define HW_BREAKPOINT_LEN_EXECUTE 0x40
+
+#ifdef CONFIG_X86_64
+#define HW_BREAKPOINT_LEN_8 0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define HW_BREAKPOINT_EXECUTE 0x80
+/* trigger on memory write */
+#define HW_BREAKPOINT_WRITE 0x81
+/* trigger on memory read or write */
+#define HW_BREAKPOINT_RW 0x83
+
+/* Total number of available HW breakpoint registers */
+#define HB_NUM 4
+
+extern struct hw_breakpoint *hbp_kernel[HB_NUM];
+extern unsigned int hbp_user_refcount[HB_NUM];
+
+/*
+ * Ptrace support: breakpoint trigger routine.
+ */
+int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+
+void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
+void arch_install_none(void);
+void arch_install_kernel_hw_breakpoint(void *);
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len);
+void arch_store_info(struct hw_breakpoint *bp);
+int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
+ unsigned int *align, struct task_struct *tsk);
+void arch_register_user_hw_breakpoint(int pos, struct hw_breakpoint *bp,
+ struct task_struct *tsk);
+int arch_modify_user_hw_breakpoint(int pos, struct hw_breakpoint *bp,
+ struct task_struct *tsk);
+void arch_unregister_user_hw_breakpoint(int pos, struct hw_breakpoint *bp,
+ struct task_struct *tsk);
+void arch_load_debug_registers(void);
+void arch_register_kernel_hw_breakpoint(int pos);
+void arch_unregister_kernel_hw_breakpoint(void);
+int hw_breakpoint_handler(struct die_args *args);
+
+#endif /* __KERNEL__ */
+#endif /* _I386_HW_BREAKPOINT_H */
+
Index: linux-2.6-tip/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/Makefile
+++ linux-2.6-tip/arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x86
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o
+obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
Index: linux-2.6-tip/arch/x86/Kconfig
===================================================================
--- linux-2.6-tip.orig/arch/x86/Kconfig
+++ linux-2.6-tip/arch/x86/Kconfig
@@ -47,6 +47,7 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_ARCH_KMEMCHECK
select HAVE_DMA_API_DEBUG
+ select HAVE_HW_BREAKPOINT
config ARCH_DEFCONFIG
string
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 03/11] Modifying generic debug exception to use thread-specific debug registers
[not found] <20090324152028.754123712@K.Prasad>
2009-03-24 15:24 ` [Patch 01/11] Introducing generic hardware breakpoint handler interfaces K.Prasad
2009-03-24 15:25 ` [Patch 02/11] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
@ 2009-03-24 15:25 ` K.Prasad
2009-03-24 15:26 ` [Patch 04/11] Introduce user-space " K.Prasad
` (7 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:25 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 3 --]
[-- Type: text/plain, Size: 3643 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch modifies the breakpoint exception handler code to use the abstract
register names.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/traps.c | 73 ++++++++++++++++--------------------------------
1 file changed, 25 insertions(+), 48 deletions(-)
Index: linux-2.6-tip/arch/x86/kernel/traps.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/traps.c
+++ linux-2.6-tip/arch/x86/kernel/traps.c
@@ -530,13 +530,14 @@ asmlinkage __kprobes struct pt_regs *syn
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned long condition;
+ unsigned long dr6;
int si_code;
- get_debugreg(condition, 6);
+ get_debugreg(dr6, 6);
+ set_debugreg(0, 6); /* DR6 may or may not be cleared by the CPU */
/* Catch kmemcheck conditions first of all! */
- if (condition & DR_STEP && kmemcheck_trap(regs))
+ if (dr6 & DR_STEP && kmemcheck_trap(regs))
return;
/*
@@ -545,61 +546,37 @@ dotraplinkage void __kprobes do_debug(st
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+ /* Store the virtualized DR6 value */
+ tsk->thread.dr6 = dr6;
+
+ if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code,
SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
- }
-
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto debug_vm86;
-#endif
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg6 = condition;
-
- /*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
- */
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, 1);
+ return;
}
- si_code = get_si_code(condition);
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code, si_code);
-
/*
- * Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
+ * Single-stepping through system calls: ignore any exceptions in
+ * kernel space, but re-enable TF when returning to user mode.
+ *
+ * We already checked v86 mode above, so we can check for kernel mode
+ * by just checking the CPL of CS.
*/
-clear_dr7:
- set_debugreg(0, 7);
- preempt_conditional_cli(regs);
- return;
-
-#ifdef CONFIG_X86_32
-debug_vm86:
- /* reenable preemption: handle_vm86_trap() might sleep */
- dec_preempt_count();
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- conditional_cli(regs);
- return;
-#endif
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ tsk->thread.dr6 &= ~DR_STEP;
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ regs->flags &= ~X86_EFLAGS_TF;
+ }
+ si_code = get_si_code(dr6);
+ if (tsk->thread.dr6 & (DR_STEP|DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))
+ send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
return;
}
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 04/11] Introduce user-space debug registers
[not found] <20090324152028.754123712@K.Prasad>
` (2 preceding siblings ...)
2009-03-24 15:25 ` [Patch 03/11] Modifying generic debug exception to use thread-specific debug registers K.Prasad
@ 2009-03-24 15:26 ` K.Prasad
2009-03-24 15:26 ` [Patch 05/11] Use wrapper routines around debug registers in processor related functions K.Prasad
` (6 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:26 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 4 --]
[-- Type: text/plain, Size: 2929 bytes --]
This patch introduces debug registers to used by the per-thread
structure and wrapper routines to manage debug registers by process-related
functions.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/include/asm/debugreg.h | 23 +++++++++++++++++++++++
arch/x86/include/asm/processor.h | 16 +++++++++-------
2 files changed, 32 insertions(+), 7 deletions(-)
Index: linux-2.6-tip/arch/x86/include/asm/debugreg.h
===================================================================
--- linux-2.6-tip.orig/arch/x86/include/asm/debugreg.h
+++ linux-2.6-tip/arch/x86/include/asm/debugreg.h
@@ -49,6 +49,8 @@
#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +69,25 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+/* For process management */
+void flush_thread_hw_breakpoint(struct task_struct *tsk);
+int copy_thread_hw_breakpoint(struct task_struct *tsk,
+ struct task_struct *child, unsigned long clone_flags);
+void dump_thread_hw_breakpoint(struct task_struct *tsk, int u_debugreg[8]);
+void switch_to_thread_hw_breakpoint(struct task_struct *tsk);
+
+/* For CPU management */
+void load_debug_registers(void);
+static inline void hw_breakpoint_disable(void)
+{
+ set_debugreg(0UL, 7);
+}
+
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_DEBUGREG_H */
Index: linux-2.6-tip/arch/x86/include/asm/processor.h
===================================================================
--- linux-2.6-tip.orig/arch/x86/include/asm/processor.h
+++ linux-2.6-tip/arch/x86/include/asm/processor.h
@@ -29,6 +29,7 @@ struct mm_struct;
#include <linux/threads.h>
#include <linux/init.h>
+#define HB_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -424,13 +425,14 @@ struct thread_struct {
unsigned long ip;
unsigned long fs;
unsigned long gs;
- /* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
- unsigned long debugreg6;
- unsigned long debugreg7;
+ /* Hardware breakpoint info */
+ struct hw_breakpoint *hbp[HB_NUM];
+ unsigned int hbp_num_installed;
+ /* Thread's view of debug reg 6 */
+ unsigned long dr6;
+ /* Thread's view of debug reg 7 */
+ unsigned long dr7;
+
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 05/11] Use wrapper routines around debug registers in processor related functions
[not found] <20090324152028.754123712@K.Prasad>
` (3 preceding siblings ...)
2009-03-24 15:26 ` [Patch 04/11] Introduce user-space " K.Prasad
@ 2009-03-24 15:26 ` K.Prasad
2009-03-24 15:26 ` [Patch 06/11] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
` (5 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:26 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 5 --]
[-- Type: text/plain, Size: 3648 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of wrapper routines to access the debug/breakpoint
registers.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/smpboot.c | 3 +++
arch/x86/power/cpu_32.c | 16 +++-------------
arch/x86/power/cpu_64.c | 15 +++------------
3 files changed, 9 insertions(+), 25 deletions(-)
Index: linux-2.6-tip/arch/x86/power/cpu_32.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/power/cpu_32.c
+++ linux-2.6-tip/arch/x86/power/cpu_32.c
@@ -12,6 +12,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/xcr.h>
+#include <asm/debugreg.h>
static struct saved_context saved_context;
@@ -47,6 +48,7 @@ static void __save_processor_state(struc
ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3();
ctxt->cr4 = read_cr4_safe();
+ hw_breakpoint_disable();
}
/* Needed by apm.c */
@@ -79,19 +81,7 @@ static void fix_processor_context(void)
load_TR_desc(); /* This does ltr */
load_LDT(¤t->active_mm->context); /* This does lldt */
- /*
- * Now maybe reload the debug registers
- */
- if (current->thread.debugreg7) {
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(current->thread.debugreg7, 7);
- }
-
+ load_debug_registers();
}
static void __restore_processor_state(struct saved_context *ctxt)
Index: linux-2.6-tip/arch/x86/power/cpu_64.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/power/cpu_64.c
+++ linux-2.6-tip/arch/x86/power/cpu_64.c
@@ -15,6 +15,7 @@
#include <asm/pgtable.h>
#include <asm/mtrr.h>
#include <asm/xcr.h>
+#include <asm/debugreg.h>
static void fix_processor_context(void);
@@ -70,6 +71,7 @@ static void __save_processor_state(struc
ctxt->cr3 = read_cr3();
ctxt->cr4 = read_cr4();
ctxt->cr8 = read_cr8();
+ hw_breakpoint_disable();
}
void save_processor_state(void)
@@ -158,16 +160,5 @@ static void fix_processor_context(void)
load_TR_desc(); /* This does ltr */
load_LDT(¤t->active_mm->context); /* This does lldt */
- /*
- * Now maybe reload the debug registers
- */
- if (current->thread.debugreg7){
- loaddebug(¤t->thread, 0);
- loaddebug(¤t->thread, 1);
- loaddebug(¤t->thread, 2);
- loaddebug(¤t->thread, 3);
- /* no 4 and 5 */
- loaddebug(¤t->thread, 6);
- loaddebug(¤t->thread, 7);
- }
+ load_debug_registers();
}
Index: linux-2.6-tip/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6-tip/arch/x86/kernel/smpboot.c
@@ -63,6 +63,7 @@
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
+#include <asm/debugreg.h>
#include <linux/mc146818rtc.h>
#include <asm/smpboot_hooks.h>
@@ -326,6 +327,7 @@ notrace static void __cpuinit start_seco
setup_secondary_clock();
wmb();
+ load_debug_registers();
cpu_idle();
}
@@ -1250,6 +1252,7 @@ void cpu_disable_common(void)
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs();
+ hw_breakpoint_disable();
}
int native_cpu_disable(void)
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 06/11] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090324152028.754123712@K.Prasad>
` (4 preceding siblings ...)
2009-03-24 15:26 ` [Patch 05/11] Use wrapper routines around debug registers in processor related functions K.Prasad
@ 2009-03-24 15:26 ` K.Prasad
2009-03-24 15:26 ` [Patch 07/11] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
` (4 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:26 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 6.new --]
[-- Type: text/plain, Size: 7306 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of abstract debug registers in
process-handling routines.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 17 deletions(-)
Index: linux-2.6-tip/arch/x86/kernel/process.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/process.c
+++ linux-2.6-tip/arch/x86/kernel/process.c
@@ -14,6 +14,8 @@
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -83,6 +85,8 @@ void exit_thread(void)
put_cpu();
kfree(bp);
}
+ if (unlikely(t->dr7))
+ flush_thread_hw_breakpoint(me);
ds_exit_thread(current);
}
@@ -103,14 +107,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(tsk->thread.dr7))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -192,16 +191,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: linux-2.6-tip/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/process_32.c
+++ linux-2.6-tip/arch/x86/kernel/process_32.c
@@ -61,6 +61,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -265,7 +267,14 @@ int copy_thread(int nr, unsigned long cl
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(tsk->thread.dr7)) {
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+ }
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -285,10 +294,13 @@ int copy_thread(int nr, unsigned long cl
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
ds_copy_thread(p, current);
@@ -426,6 +438,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in the chbi
+ * area, so it will think the task pointer is leftover from an old task
+ * (lazy switching) and will erase it. Then until the next context
+ * switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * chbi->bp_task at the same instant, so there will always be a window
+ * in which they disagree and a breakpoint might get triggered. Since
+ * we use lazy switching, we are forced to assume that a disagreement
+ * means that current is correct and chbi->bp_task is old. But if you
+ * move the code above then you'll create a window in which current is
+ * old and chbi->bp_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: linux-2.6-tip/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/process_64.c
+++ linux-2.6-tip/arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -248,6 +250,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.dr7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -303,12 +307,18 @@ int copy_thread(int nr, unsigned long cl
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(me->thread.dr7)) {
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+ }
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -346,6 +356,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -491,6 +504,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in the chbi
+ * area, so it will think the task pointer is leftover from an old task
+ * (lazy switching) and will erase it. Then until the next context
+ * switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * chbi->bp_task at the same instant, so there will always be a window
+ * in which they disagree and a breakpoint might get triggered. Since
+ * we use lazy switching, we are forced to assume that a disagreement
+ * means that current is correct and chbi->bp_task is old. But if you
+ * move the code above then you'll create a window in which current is
+ * old and chbi->bp_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 07/11] Modify signal handling code to refrain from re-enabling HW Breakpoints
[not found] <20090324152028.754123712@K.Prasad>
` (5 preceding siblings ...)
2009-03-24 15:26 ` [Patch 06/11] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
@ 2009-03-24 15:26 ` K.Prasad
2009-03-24 15:26 ` [Patch 08/11] Modify Ptrace routines to access breakpoint registers K.Prasad
` (3 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:26 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 7 --]
[-- Type: text/plain, Size: 1126 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch disables re-enabling of Hardware Breakpoint registers through
the signal handling code. This is now done during
hw_breakpoint_handler().
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/signal.c | 9 ---------
1 file changed, 9 deletions(-)
Index: linux-2.6-tip/arch/x86/kernel/signal.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/signal.c
+++ linux-2.6-tip/arch/x86/kernel/signal.c
@@ -800,15 +800,6 @@ static void do_signal(struct pt_regs *re
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /*
- * Re-enable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 08/11] Modify Ptrace routines to access breakpoint registers
[not found] <20090324152028.754123712@K.Prasad>
` (6 preceding siblings ...)
2009-03-24 15:26 ` [Patch 07/11] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
@ 2009-03-24 15:26 ` K.Prasad
2009-03-24 15:27 ` [Patch 09/11] Cleanup HW Breakpoint registers before kexec K.Prasad
` (2 subsequent siblings)
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:26 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 8 --]
[-- Type: text/plain, Size: 8696 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch modifies the ptrace code to use the new wrapper routines around the
debug/breakpoint registers.
[K.Prasad: Adapted the ptrace routines and to changes post x86/x86_64 merger,
split the minor patch from bigger patch. Re-wrote ptrace_write_dr7()
and ptrace_set_debugreg() functions to use new data-structures]
[K.Prasad: Changed code to suit the simplified HW breakpoint implementation]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/ptrace.c | 229 ++++++++++++++++++++++++++++-------------------
1 file changed, 138 insertions(+), 91 deletions(-)
Index: linux-2.6-tip/arch/x86/kernel/ptrace.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/ptrace.c
+++ linux-2.6-tip/arch/x86/kernel/ptrace.c
@@ -34,6 +34,7 @@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
#include "tls.h"
@@ -134,11 +135,6 @@ static int set_segment_reg(struct task_s
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
- return TASK_SIZE - 3;
-}
-
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -263,15 +259,6 @@ static int set_segment_reg(struct task_s
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- return IA32_PAGE_OFFSET - 3;
-#endif
- return TASK_SIZE_MAX - 7;
-}
-
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
@@ -462,95 +449,155 @@ static int genregs_set(struct task_struc
}
/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7. Return the "enabled" status.
*/
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
+ unsigned *type)
{
- switch (n) {
- case 0: return child->thread.debugreg0;
- case 1: return child->thread.debugreg1;
- case 2: return child->thread.debugreg2;
- case 3: return child->thread.debugreg3;
- case 6: return child->thread.debugreg6;
- case 7: return child->thread.debugreg7;
- }
- return 0;
+ int temp = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+ *len = (temp & 0xc) | 0x40;
+ *type = (temp & 0x3) | 0x80;
+ return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
}
-static int ptrace_set_debugreg(struct task_struct *child,
- int n, unsigned long data)
+static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
{
+ struct thread_struct *thread = &(current->thread);
int i;
- if (unlikely(n == 4 || n == 5))
- return -EIO;
+ /* Store in the virtual DR6 register the fact that the breakpoint
+ * was hit so the thread's debugger will see it.
+ */
+ for (i = 0; hbp_user_refcount[i]; i++)
+ if (bp->info.address == thread->hbp[i]->info.address)
+ break;
- if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
- return -EIO;
+ thread->dr6 |= (DR_TRAP0 << i);
+}
- switch (n) {
- case 0: child->thread.debugreg0 = data; break;
- case 1: child->thread.debugreg1 = data; break;
- case 2: child->thread.debugreg2 = data; break;
- case 3: child->thread.debugreg3 = data; break;
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
+{
+ struct hw_breakpoint *bp;
+ struct thread_struct *thread = &(tsk->thread);
+ int i;
+ int rc = 0;
+ unsigned long old_dr7 = thread->dr7;
- case 6:
- if ((data & ~0xffffffffUL) != 0)
- return -EIO;
- child->thread.debugreg6 = data;
- break;
+ data &= ~DR_CONTROL_RESERVED;
+ /* Loop through all the hardware breakpoints, making the
+ * appropriate changes to each.
+ */
+ thread->dr7 = data;
+ for (i = 0; i < HB_NUM; i++) {
+ int enabled;
+ unsigned len, type;
+
+ bp = thread->hbp[i];
+ if (!bp)
+ continue;
+
+ enabled = decode_dr7(data, i, &len, &type);
+ if (!enabled) {
+ if (bp->triggered)
+ __unregister_user_hw_breakpoint(i, tsk, bp);
+ continue;
+ }
- case 7:
- /*
- * Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System Programming)
- */
-#ifdef CONFIG_X86_32
-#define DR7_MASK 0x5f54
-#else
-#define DR7_MASK 0x5554
-#endif
- data &= ~DR_CONTROL_RESERVED;
- for (i = 0; i < 4; i++)
- if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
+ if (bp->triggered)
+ rc = __modify_user_hw_breakpoint(i, tsk, bp);
+ else {
+ bp->triggered = ptrace_triggered;
+ bp->info.len = len;
+ bp->info.type = type;
+ rc = __register_user_hw_breakpoint(i, tsk, bp);
+ }
+ if (rc < 0)
+ break;
else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
+ set_tsk_thread_flag(tsk, TIF_DEBUG);
}
+ /* If anything above failed, restore the original settings.
+ * The original settings will take effect the next time the thread
+ * is scheduled
+ */
+ if (rc < 0)
+ data = old_dr7;
+ return rc;
+}
- return 0;
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long val = 0;
+
+ mutex_lock(&hw_breakpoint_mutex);
+ if (n < HB_NUM) {
+ if (thread->hbp[n])
+ val = thread->hbp[n]->info.address;
+ } else if (n == 6) {
+ val = thread->dr6;
+ } else if (n == 7) {
+ val = thread->dr7;
+ }
+ mutex_unlock(&hw_breakpoint_mutex);
+ return val;
+}
+
+/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc = -EIO;
+
+ mutex_lock(&hw_breakpoint_mutex);
+
+ /* There are no DR4 or DR5 registers */
+ if (n == 4 || n == 5)
+ goto ret_path;
+
+ /* Writes to DR6 modify the virtualized value */
+ if (n == 6) {
+ tsk->thread.dr6 = val;
+ rc = 0;
+ goto ret_path;
+ }
+
+ /* Writes to DR0 - DR3 change a breakpoint address */
+ rc = 0;
+ if (n < HB_NUM) {
+ if (!val)
+ goto ret_path;
+ if (thread->hbp[n]) {
+ thread->hbp[n]->info.address = val;
+ rc = __modify_user_hw_breakpoint(n, tsk,
+ thread->hbp[n]);
+ goto ret_path;
+ }
+ thread->hbp[n] = kzalloc(sizeof(struct hw_breakpoint),
+ GFP_KERNEL);
+ if (!thread->hbp[n]) {
+ rc = -ENOMEM;
+ goto ret_path;
+ } else
+ thread->hbp[n]->info.address = val;
+ }
+ /* All that's left is DR7 */
+ if (n == 7)
+ rc = ptrace_write_dr7(tsk, val);
+
+ret_path:
+ mutex_unlock(&hw_breakpoint_mutex);
+ return rc;
}
/*
@@ -871,7 +918,7 @@ long arch_ptrace(struct task_struct *chi
else if (addr >= offsetof(struct user, u_debugreg[0]) &&
addr <= offsetof(struct user, u_debugreg[7])) {
addr -= offsetof(struct user, u_debugreg[0]);
- tmp = ptrace_get_debugreg(child, addr / sizeof(data));
+ tmp = ptrace_get_debugreg(child, addr/sizeof(data));
}
ret = put_user(tmp, datap);
break;
@@ -889,7 +936,7 @@ long arch_ptrace(struct task_struct *chi
addr <= offsetof(struct user, u_debugreg[7])) {
addr -= offsetof(struct user, u_debugreg[0]);
ret = ptrace_set_debugreg(child,
- addr / sizeof(data), data);
+ addr/sizeof(data), data);
}
break;
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 09/11] Cleanup HW Breakpoint registers before kexec
[not found] <20090324152028.754123712@K.Prasad>
` (7 preceding siblings ...)
2009-03-24 15:26 ` [Patch 08/11] Modify Ptrace routines to access breakpoint registers K.Prasad
@ 2009-03-24 15:27 ` K.Prasad
2009-03-24 15:27 ` [Patch 10/11] Sample HW breakpoint over kernel data address K.Prasad
2009-03-24 15:28 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:27 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 9 --]
[-- Type: text/plain, Size: 1782 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch disables Hardware breakpoints before doing a 'kexec' on the machine.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/machine_kexec_32.c | 2 ++
arch/x86/kernel/machine_kexec_64.c | 2 ++
2 files changed, 4 insertions(+)
Index: linux-2.6-tip/arch/x86/kernel/machine_kexec_32.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/machine_kexec_32.c
+++ linux-2.6-tip/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/cacheflush.h>
+#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
Index: linux-2.6-tip/arch/x86/kernel/machine_kexec_64.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kernel/machine_kexec_64.c
+++ linux-2.6-tip/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 10/11] Sample HW breakpoint over kernel data address
[not found] <20090324152028.754123712@K.Prasad>
` (8 preceding siblings ...)
2009-03-24 15:27 ` [Patch 09/11] Cleanup HW Breakpoint registers before kexec K.Prasad
@ 2009-03-24 15:27 ` K.Prasad
2009-03-24 15:28 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
10 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:27 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: 10 --]
[-- Type: text/plain, Size: 4574 bytes --]
This patch introduces a sample kernel module to demonstrate the use of Hardware
Breakpoint feature. It places a breakpoint over the kernel variable 'pid_max'
to monitor all write operations and emits a function-backtrace when done.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
samples/Kconfig | 6 ++
samples/Makefile | 4 +
samples/hw_breakpoint/Makefile | 1
samples/hw_breakpoint/data_breakpoint.c | 79 ++++++++++++++++++++++++++++++++
4 files changed, 89 insertions(+), 1 deletion(-)
Index: linux-2.6-tip/samples/Kconfig
===================================================================
--- linux-2.6-tip.orig/samples/Kconfig
+++ linux-2.6-tip/samples/Kconfig
@@ -39,5 +39,11 @@ config SAMPLE_KRETPROBES
default m
depends on SAMPLE_KPROBES && KRETPROBES
+config SAMPLE_HW_BREAKPOINT
+ tristate "Build kernel hardware breakpoint examples -- loadable modules only"
+ depends on HAVE_HW_BREAKPOINT && m
+ help
+ This builds kernel hardware breakpoint example modules.
+
endif # SAMPLES
Index: linux-2.6-tip/samples/Makefile
===================================================================
--- linux-2.6-tip.orig/samples/Makefile
+++ linux-2.6-tip/samples/Makefile
@@ -1,3 +1,5 @@
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/
+obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \
+ hw_breakpoint/
+
Index: linux-2.6-tip/samples/hw_breakpoint/Makefile
===================================================================
--- /dev/null
+++ linux-2.6-tip/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
Index: linux-2.6-tip/samples/hw_breakpoint/data_breakpoint.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,79 @@
+/*
+ * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * This file is a kernel module that places a breakpoint over 'pid_max' kernel
+ * variable using Hardware Breakpoint register. The corresponding handler which
+ * prints a backtrace is invoked everytime a write operation is performed on
+ * that variable.
+ *
+ * After inserting this module, invoke a write operation using
+ * 'echo <desired_value> > /proc/sys/kernel/pid_max'
+ * to find the function-call backtrace.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+#include <linux/module.h> /* Needed by all modules */
+#include <linux/kernel.h> /* Needed for KERN_INFO */
+#include <linux/init.h> /* Needed for the macros */
+
+#include <asm/hw_breakpoint.h>
+
+struct hw_breakpoint pid_max_hbp;
+
+void pid_max_hbp_handler(struct hw_breakpoint *temp, struct pt_regs
+ *temp_regs)
+{
+ printk(KERN_INFO "pid_max value is changed\n");
+ dump_stack();
+ printk(KERN_INFO "Dump stack from pid_max_hbp_handler\n");
+}
+
+static int __init hw_break_module_init(void)
+{
+ int ret;
+
+#ifdef CONFIG_X86
+ pid_max_hbp.info.name = "pid_max";
+ pid_max_hbp.info.type = HW_BREAKPOINT_WRITE;
+ pid_max_hbp.info.len = HW_BREAKPOINT_LEN_4;
+
+ pid_max_hbp.triggered = (void *)pid_max_hbp_handler;
+#endif /* CONFIG_X86 */
+
+ ret = register_kernel_hw_breakpoint(&pid_max_hbp);
+
+ if (ret < 0) {
+ printk(KERN_INFO "Breakpoint registration failed\n");
+ return ret;
+ } else
+ printk(KERN_INFO "HW Breakpoint for pid_max write installed\n");
+
+ return 0;
+}
+
+static void __exit hw_break_module_exit(void)
+{
+ unregister_kernel_hw_breakpoint(&pid_max_hbp);
+ printk(KERN_INFO "HW Breakpoint for pid_max write uninstalled\n");
+}
+
+module_init(hw_break_module_init);
+module_exit(hw_break_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("K.Prasad");
+MODULE_DESCRIPTION("pid_max breakpoint");
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
[not found] <20090324152028.754123712@K.Prasad>
` (9 preceding siblings ...)
2009-03-24 15:27 ` [Patch 10/11] Sample HW breakpoint over kernel data address K.Prasad
@ 2009-03-24 15:28 ` K.Prasad
2009-03-22 9:35 ` Pavel Machek
` (2 more replies)
10 siblings, 3 replies; 33+ messages in thread
From: K.Prasad @ 2009-03-24 15:28 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt,
K.Prasad
[-- Attachment #1: ftrace_hbkpt_12 --]
[-- Type: text/plain, Size: 19350 bytes --]
This patch adds an ftrace plugin to detect and profile memory access over
kernel variables. It uses HW Breakpoint interfaces to 'watch memory
addresses.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
kernel/trace/Kconfig | 21 +
kernel/trace/Makefile | 1
kernel/trace/trace.h | 25 +
kernel/trace/trace_ksym.c | 555 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_selftest.c | 36 ++
5 files changed, 638 insertions(+)
Index: linux-2.6-tip/kernel/trace/Kconfig
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Kconfig
+++ linux-2.6-tip/kernel/trace/Kconfig
@@ -268,6 +268,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
Index: linux-2.6-tip/kernel/trace/Makefile
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Makefile
+++ linux-2.6-tip/kernel/trace/Makefile
@@ -46,5 +46,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_expo
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
libftrace-y := ftrace.o
Index: linux-2.6-tip/kernel/trace/trace.h
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace.h
+++ linux-2.6-tip/kernel/trace/trace.h
@@ -12,6 +12,10 @@
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#ifdef CONFIG_KSYM_TRACER
+#include <asm/hw_breakpoint.h>
+#endif
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -37,6 +41,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -212,6 +217,23 @@ struct syscall_trace_exit {
unsigned long ret;
};
+#ifdef CONFIG_KSYM_TRACER
+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbp;
+ unsigned long ksym_addr;
+ unsigned long ip;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ unsigned long counter;
+#endif
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
+#else
+struct trace_ksym {
+};
+#endif /* CONFIG_KSYM_TRACER */
/*
* trace_flag_type is an enumeration that holds different
@@ -330,6 +352,7 @@ extern void __ftrace_bad_type(void);
TRACE_SYSCALL_ENTER); \
IF_ASSIGN(var, ent, struct syscall_trace_exit, \
TRACE_SYSCALL_EXIT); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)
@@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
Index: linux-2.6-tip/kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/kernel/trace/trace_ksym.c
@@ -0,0 +1,555 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#ifdef CONFIG_FTRACE_SELFTEST
+
+static int ksym_selftest_dummy;
+#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
+
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static struct trace_array *ksym_trace_array;
+
+DEFINE_MUTEX(ksym_tracer_mutex);
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+DEFINE_SPINLOCK(ksym_stat_lock);
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+
+ spin_lock(&ksym_stat_lock);
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if ((entry->ksym_addr == hbp_hit_addr) &&
+ (entry->counter <= MAX_UL_INT)) {
+ entry->counter++;
+ break;
+ }
+ }
+ spin_unlock(&ksym_stat_lock);
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbp = hbp;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ ksym_collect_stats(hbp->info.address);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
+ (*addr == 0))
+ goto return_code;
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbp) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
+ entry->ksym_hbp->info.name = ksymname;
+ entry->ksym_hbp->info.type = op;
+ entry->ksym_addr = entry->ksym_hbp->info.address = addr;
+ entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
+
+ entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
+
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret < 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbp->info.name);
+ if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbp->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ entry->ksym_hbp->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret == 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ ksym_tracing_enabled = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+
+ /* Free the 'input_string' only if reset
+ * after startup self-test
+ */
+#ifdef CONFIG_FTRACE_SELFTEST
+ if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
+ strlen(KSYM_SELFTEST_ENTRY)) != 0)
+#endif /* CONFIG_FTRACE_SELFTEST*/
+ kfree(entry->ksym_hbp->info.name);
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+#ifdef CONFIG_FTRACE_SELFTEST
+ /* Check if we are re-entering self-test code during initialisation */
+ if (ksym_selftest_dummy)
+ goto ret_path;
+
+ ksym_selftest_dummy = 0;
+
+ /* Register the read-write tracing request */
+ ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+
+ret_path:
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ entry->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbp->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+ seq_printf(m, " Access type ");
+ seq_printf(m, " Symbol Counter \n");
+ return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+ struct hlist_node *stat = v;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
+ entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+ if (entry->ksym_hbp)
+ access_type = entry->ksym_hbp->info.type;
+
+ switch (access_type) {
+ case HW_BREAKPOINT_WRITE:
+ seq_printf(m, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ seq_printf(m, " RW ");
+ break;
+ default:
+ seq_printf(m, " NA ");
+ }
+
+ if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+ seq_printf(m, " %s ", fn_name);
+ else
+ seq_printf(m, " <NA> ");
+
+ seq_printf(m, "%15lu\n", entry->counter);
+ return 0;
+}
+
+static void *ksym_tracer_stat_start(void)
+{
+ return &(ksym_filter_head.first);
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+ struct hlist_node *stat = v;
+
+ return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+ .name = "ksym_tracer",
+ .stat_start = ksym_tracer_stat_start,
+ .stat_next = ksym_tracer_stat_next,
+ .stat_headers = ksym_tracer_stat_headers,
+ .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+ int ret;
+
+ ret = register_stat_tracer(&ksym_tracer_stats);
+ if (ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "ksym tracer stats\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
Index: linux-2.6-tip/kernel/trace/trace_selftest.c
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace_selftest.c
+++ linux-2.6-tip/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(stru
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -749,3 +750,38 @@ trace_selftest_startup_branch(struct tra
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-24 15:28 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
2009-03-22 9:35 ` Pavel Machek
@ 2009-03-25 3:03 ` Steven Rostedt
2009-03-25 3:30 ` K.Prasad
2009-03-25 6:35 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v3 K.Prasad
2 siblings, 1 reply; 33+ messages in thread
From: Steven Rostedt @ 2009-03-25 3:03 UTC (permalink / raw)
To: K.Prasad
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Frederic Weisbecker, maneesh,
Roland McGrath
Hi Prasad,
On Tue, 24 Mar 2009, K.Prasad wrote:
> +
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
> +
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + trace_assign_type(field, entry);
Looking at this, you do not test the iter->ent->type. If events or
trace_printk is running, this will fail your tracer.
You want to test for your types first, because all trace types will be
called for "print_line" function below.
Thus you want something like
if (entry->type != TRACE_KSYM)
return TRACE_TYPE_UNHANDLED;
Then the tracer infrastructure could print out the trace event or
trace_printk.
If you only want to print your events and ignore all others. Return
TRACE_TYPE_HANDLED.
Note, the if needs to be before the trace_assign_type.
-- Steve
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + entry->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbp->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +struct tracer ksym_tracer __read_mostly =
> +{
> + .name = "ksym_tracer",
> + .init = ksym_trace_init,
> + .reset = ksym_trace_reset,
> +#ifdef CONFIG_FTRACE_SELFTEST
> + .selftest = trace_selftest_startup_ksym,
> +#endif
> + .print_header = ksym_trace_print_header,
> + .print_line = ksym_trace_output
> +};
> +
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-25 3:03 ` Steven Rostedt
@ 2009-03-25 3:30 ` K.Prasad
2009-03-25 3:48 ` Steven Rostedt
0 siblings, 1 reply; 33+ messages in thread
From: K.Prasad @ 2009-03-25 3:30 UTC (permalink / raw)
To: Steven Rostedt
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Frederic Weisbecker, maneesh,
Roland McGrath
On Tue, Mar 24, 2009 at 11:03:18PM -0400, Steven Rostedt wrote:
>
> Hi Prasad,
>
> On Tue, 24 Mar 2009, K.Prasad wrote:
>
> > +
> > +static void ksym_trace_print_header(struct seq_file *m)
> > +{
> > +
> > + seq_puts(m,
> > + "# TASK-PID CPU# Symbol Type "
> > + "Function \n");
> > + seq_puts(m,
> > + "# | | | | "
> > + "| \n");
> > +}
> > +
> > +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> > +{
> > + struct trace_entry *entry = iter->ent;
> > + struct trace_seq *s = &iter->seq;
> > + struct trace_ksym *field;
> > + char str[KSYM_SYMBOL_LEN];
> > + int ret;
> > +
> > + trace_assign_type(field, entry);
>
> Looking at this, you do not test the iter->ent->type. If events or
> trace_printk is running, this will fail your tracer.
>
> You want to test for your types first, because all trace types will be
> called for "print_line" function below.
>
> Thus you want something like
>
> if (entry->type != TRACE_KSYM)
> return TRACE_TYPE_UNHANDLED;
>
> Then the tracer infrastructure could print out the trace event or
> trace_printk.
>
> If you only want to print your events and ignore all others. Return
> TRACE_TYPE_HANDLED.
>
> Note, the if needs to be before the trace_assign_type.
>
> -- Steve
>
Thanks for pointing that out. I will make those changes and send out a
new patchset.
However, looking at some of the existing plugins I find that many of
them do this check after trace_assign_type() or not do at all (e.g.
trace_branch_print()). They may need similar changes too.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-03-25 3:30 ` K.Prasad
@ 2009-03-25 3:48 ` Steven Rostedt
0 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2009-03-25 3:48 UTC (permalink / raw)
To: K.Prasad
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Frederic Weisbecker, maneesh,
Roland McGrath
On Wed, 25 Mar 2009, K.Prasad wrote:
>
> Thanks for pointing that out. I will make those changes and send out a
> new patchset.
>
> However, looking at some of the existing plugins I find that many of
> them do this check after trace_assign_type() or not do at all (e.g.
> trace_branch_print()). They may need similar changes too.
Ah, that was the other option I was going to mention. The
trace_branch_print is not called by tracer->print_line, like you have.
What you did is not wrong, but there are other options.
The other option is to register an event callback. That is, the ftrace
infrastructure will call the event registered to an id. I register
the trace_branch_event with the TRACE_BRANCH id. Then the ftrace
infrastructure will call the trace_branch_print if it encounters the
TRACE_BRANCH entry.
This means you can do the UNHANDLED return as I suggested. Or you remove
the print_line from you tracer and register an event for TRACE_KSYM
instead.
The choice is up to you ;-)
-- Steve
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v3
2009-03-24 15:28 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
2009-03-22 9:35 ` Pavel Machek
2009-03-25 3:03 ` Steven Rostedt
@ 2009-03-25 6:35 ` K.Prasad
2009-03-25 13:24 ` Steven Rostedt
2 siblings, 1 reply; 33+ messages in thread
From: K.Prasad @ 2009-03-25 6:35 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Andrew Morton, Benjamin Herrenschmidt,
Frederic Weisbecker, maneesh, Roland McGrath, Steven Rostedt
On Tue, Mar 24, 2009 at 08:58:11PM +0530, K.Prasad wrote:
> This patch adds an ftrace plugin to detect and profile memory access over
> kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> addresses.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> kernel/trace/Kconfig | 21 +
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 25 +
> kernel/trace/trace_ksym.c | 555 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_selftest.c | 36 ++
> 5 files changed, 638 insertions(+)
>
Submitting the revised patch 11/11 and not reposting the entire
patchset.
Changelog
---------
- Checking for the appropriate tracer type in the .print_line handler function
Please find the new patch below.
Thanks,
K.Prasad
ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
This patch adds an ftrace plugin to detect and profile memory access over
kernel variables. It uses HW Breakpoint interfaces to 'watch memory
addresses.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
kernel/trace/Kconfig | 21 +
kernel/trace/Makefile | 1
kernel/trace/trace.h | 25 +
kernel/trace/trace_ksym.c | 558 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_selftest.c | 36 ++
5 files changed, 641 insertions(+)
Index: linux-2.6-tip/kernel/trace/Kconfig
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Kconfig
+++ linux-2.6-tip/kernel/trace/Kconfig
@@ -268,6 +268,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
Index: linux-2.6-tip/kernel/trace/Makefile
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Makefile
+++ linux-2.6-tip/kernel/trace/Makefile
@@ -46,5 +46,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_expo
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
libftrace-y := ftrace.o
Index: linux-2.6-tip/kernel/trace/trace.h
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace.h
+++ linux-2.6-tip/kernel/trace/trace.h
@@ -12,6 +12,10 @@
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#ifdef CONFIG_KSYM_TRACER
+#include <asm/hw_breakpoint.h>
+#endif
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -37,6 +41,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -212,6 +217,23 @@ struct syscall_trace_exit {
unsigned long ret;
};
+#ifdef CONFIG_KSYM_TRACER
+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbp;
+ unsigned long ksym_addr;
+ unsigned long ip;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ unsigned long counter;
+#endif
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
+#else
+struct trace_ksym {
+};
+#endif /* CONFIG_KSYM_TRACER */
/*
* trace_flag_type is an enumeration that holds different
@@ -330,6 +352,7 @@ extern void __ftrace_bad_type(void);
TRACE_SYSCALL_ENTER); \
IF_ASSIGN(var, ent, struct syscall_trace_exit, \
TRACE_SYSCALL_EXIT); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)
@@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
Index: linux-2.6-tip/kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/kernel/trace/trace_ksym.c
@@ -0,0 +1,558 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#ifdef CONFIG_FTRACE_SELFTEST
+
+static int ksym_selftest_dummy;
+#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
+
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static struct trace_array *ksym_trace_array;
+
+DEFINE_MUTEX(ksym_tracer_mutex);
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+DEFINE_SPINLOCK(ksym_stat_lock);
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+
+ spin_lock(&ksym_stat_lock);
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if ((entry->ksym_addr == hbp_hit_addr) &&
+ (entry->counter <= MAX_UL_INT)) {
+ entry->counter++;
+ break;
+ }
+ }
+ spin_unlock(&ksym_stat_lock);
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbp = hbp;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ ksym_collect_stats(hbp->info.address);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
+ (*addr == 0))
+ goto return_code;
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbp) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
+ entry->ksym_hbp->info.name = ksymname;
+ entry->ksym_hbp->info.type = op;
+ entry->ksym_addr = entry->ksym_hbp->info.address = addr;
+ entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
+
+ entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
+
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret < 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbp->info.name);
+ if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbp->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ entry->ksym_hbp->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret == 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ ksym_tracing_enabled = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+
+ /* Free the 'input_string' only if reset
+ * after startup self-test
+ */
+#ifdef CONFIG_FTRACE_SELFTEST
+ if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
+ strlen(KSYM_SELFTEST_ENTRY)) != 0)
+#endif /* CONFIG_FTRACE_SELFTEST*/
+ kfree(entry->ksym_hbp->info.name);
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+#ifdef CONFIG_FTRACE_SELFTEST
+ /* Check if we are re-entering self-test code during initialisation */
+ if (ksym_selftest_dummy)
+ goto ret_path;
+
+ ksym_selftest_dummy = 0;
+
+ /* Register the read-write tracing request */
+ ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+
+ret_path:
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ if (entry->type != TRACE_KSYM)
+ return TRACE_TYPE_UNHANDLED;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ entry->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbp->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+ seq_printf(m, " Access type ");
+ seq_printf(m, " Symbol Counter \n");
+ return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+ struct hlist_node *stat = v;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
+ entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+ if (entry->ksym_hbp)
+ access_type = entry->ksym_hbp->info.type;
+
+ switch (access_type) {
+ case HW_BREAKPOINT_WRITE:
+ seq_printf(m, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ seq_printf(m, " RW ");
+ break;
+ default:
+ seq_printf(m, " NA ");
+ }
+
+ if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+ seq_printf(m, " %s ", fn_name);
+ else
+ seq_printf(m, " <NA> ");
+
+ seq_printf(m, "%15lu\n", entry->counter);
+ return 0;
+}
+
+static void *ksym_tracer_stat_start(void)
+{
+ return &(ksym_filter_head.first);
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+ struct hlist_node *stat = v;
+
+ return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+ .name = "ksym_tracer",
+ .stat_start = ksym_tracer_stat_start,
+ .stat_next = ksym_tracer_stat_next,
+ .stat_headers = ksym_tracer_stat_headers,
+ .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+ int ret;
+
+ ret = register_stat_tracer(&ksym_tracer_stats);
+ if (ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "ksym tracer stats\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
Index: linux-2.6-tip/kernel/trace/trace_selftest.c
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace_selftest.c
+++ linux-2.6-tip/kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(stru
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -749,3 +750,38 @@ trace_selftest_startup_branch(struct tra
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v3
2009-03-25 6:35 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v3 K.Prasad
@ 2009-03-25 13:24 ` Steven Rostedt
0 siblings, 0 replies; 33+ messages in thread
From: Steven Rostedt @ 2009-03-25 13:24 UTC (permalink / raw)
To: K.Prasad
Cc: Ingo Molnar, Linux Kernel Mailing List, Alan Stern, Andrew Morton,
Benjamin Herrenschmidt, Frederic Weisbecker, maneesh,
Roland McGrath
On Wed, 25 Mar 2009, K.Prasad wrote:
> On Tue, Mar 24, 2009 at 08:58:11PM +0530, K.Prasad wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over
> > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > addresses.
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > kernel/trace/Kconfig | 21 +
> > kernel/trace/Makefile | 1
> > kernel/trace/trace.h | 25 +
> > kernel/trace/trace_ksym.c | 555 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/trace/trace_selftest.c | 36 ++
> > 5 files changed, 638 insertions(+)
> >
>
> Submitting the revised patch 11/11 and not reposting the entire
> patchset.
>
> Changelog
> ---------
> - Checking for the appropriate tracer type in the .print_line handler function
>
> Please find the new patch below.
>
> Thanks,
> K.Prasad
>
>
> ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
>
> This patch adds an ftrace plugin to detect and profile memory access over
> kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> addresses.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> kernel/trace/Kconfig | 21 +
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 25 +
> kernel/trace/trace_ksym.c | 558 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_selftest.c | 36 ++
> 5 files changed, 641 insertions(+)
>
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + if (entry->type != TRACE_KSYM)
> + return TRACE_TYPE_UNHANDLED;
Looks good.
Acked-by:Steven Rostedt <srostedt@redhat.com>
Thanks,
-- Steve
> +
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + entry->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbp->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
^ permalink raw reply [flat|nested] 33+ messages in thread
* [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
[not found] <20090407063058.301701787@prasadkr_t60p.in.ibm.com>
@ 2009-04-07 6:37 ` K.Prasad
2009-04-08 8:02 ` Frederic Weisbecker
0 siblings, 1 reply; 33+ messages in thread
From: K.Prasad @ 2009-04-07 6:37 UTC (permalink / raw)
To: Alan Stern
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, Frederic Weisbecker, maneesh,
Roland McGrath, Steven Rostedt, K.Prasad, Steven Rostedt
[-- Attachment #1: ftrace_ksym_tracer_11 --]
[-- Type: text/plain, Size: 19267 bytes --]
This patch adds an ftrace plugin to detect and profile memory access over kernel
variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Acked-by: Steven Rostedt <srostedt@redhat.com>
---
kernel/trace/Kconfig | 21 +
kernel/trace/Makefile | 1
kernel/trace/trace.h | 25 +
kernel/trace/trace_ksym.c | 558 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_selftest.c | 36 ++
5 files changed, 641 insertions(+)
Index: kernel/trace/Kconfig
===================================================================
--- kernel/trace/Kconfig.orig
+++ kernel/trace/Kconfig
@@ -268,6 +268,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
Index: kernel/trace/Makefile
===================================================================
--- kernel/trace/Makefile.orig
+++ kernel/trace/Makefile
@@ -46,5 +46,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_expo
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
libftrace-y := ftrace.o
Index: kernel/trace/trace.h
===================================================================
--- kernel/trace/trace.h.orig
+++ kernel/trace/trace.h
@@ -12,6 +12,10 @@
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#ifdef CONFIG_KSYM_TRACER
+#include <asm/hw_breakpoint.h>
+#endif
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -37,6 +41,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -212,6 +217,23 @@ struct syscall_trace_exit {
unsigned long ret;
};
+#ifdef CONFIG_KSYM_TRACER
+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbp;
+ unsigned long ksym_addr;
+ unsigned long ip;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ unsigned long counter;
+#endif
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
+#else
+struct trace_ksym {
+};
+#endif /* CONFIG_KSYM_TRACER */
/*
* trace_flag_type is an enumeration that holds different
@@ -330,6 +352,7 @@ extern void __ftrace_bad_type(void);
TRACE_SYSCALL_ENTER); \
IF_ASSIGN(var, ent, struct syscall_trace_exit, \
TRACE_SYSCALL_EXIT); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)
@@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
Index: kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ kernel/trace/trace_ksym.c
@@ -0,0 +1,558 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#ifdef CONFIG_FTRACE_SELFTEST
+
+static int ksym_selftest_dummy;
+#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
+
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static struct trace_array *ksym_trace_array;
+
+DEFINE_MUTEX(ksym_tracer_mutex);
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+DEFINE_SPINLOCK(ksym_stat_lock);
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+
+ spin_lock(&ksym_stat_lock);
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if ((entry->ksym_addr == hbp_hit_addr) &&
+ (entry->counter <= MAX_UL_INT)) {
+ entry->counter++;
+ break;
+ }
+ }
+ spin_unlock(&ksym_stat_lock);
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbp = hbp;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ ksym_collect_stats(hbp->info.address);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
+ (*addr == 0))
+ goto return_code;
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbp) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
+ entry->ksym_hbp->info.name = ksymname;
+ entry->ksym_hbp->info.type = op;
+ entry->ksym_addr = entry->ksym_hbp->info.address = addr;
+ entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
+
+ entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
+
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret < 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbp->info.name);
+ if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbp->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ entry->ksym_hbp->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret == 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ ksym_tracing_enabled = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+
+ /* Free the 'input_string' only if reset
+ * after startup self-test
+ */
+#ifdef CONFIG_FTRACE_SELFTEST
+ if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
+ strlen(KSYM_SELFTEST_ENTRY)) != 0)
+#endif /* CONFIG_FTRACE_SELFTEST*/
+ kfree(entry->ksym_hbp->info.name);
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+#ifdef CONFIG_FTRACE_SELFTEST
+ /* Check if we are re-entering self-test code during initialisation */
+ if (ksym_selftest_dummy)
+ goto ret_path;
+
+ ksym_selftest_dummy = 0;
+
+ /* Register the read-write tracing request */
+ ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+
+ret_path:
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ if (entry->type != TRACE_KSYM)
+ return TRACE_TYPE_UNHANDLED;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ entry->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbp->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+ seq_printf(m, " Access type ");
+ seq_printf(m, " Symbol Counter \n");
+ return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+ struct hlist_node *stat = v;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
+ entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+ if (entry->ksym_hbp)
+ access_type = entry->ksym_hbp->info.type;
+
+ switch (access_type) {
+ case HW_BREAKPOINT_WRITE:
+ seq_printf(m, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ seq_printf(m, " RW ");
+ break;
+ default:
+ seq_printf(m, " NA ");
+ }
+
+ if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+ seq_printf(m, " %s ", fn_name);
+ else
+ seq_printf(m, " <NA> ");
+
+ seq_printf(m, "%15lu\n", entry->counter);
+ return 0;
+}
+
+static void *ksym_tracer_stat_start(void)
+{
+ return &(ksym_filter_head.first);
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+ struct hlist_node *stat = v;
+
+ return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+ .name = "ksym_tracer",
+ .stat_start = ksym_tracer_stat_start,
+ .stat_next = ksym_tracer_stat_next,
+ .stat_headers = ksym_tracer_stat_headers,
+ .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+ int ret;
+
+ ret = register_stat_tracer(&ksym_tracer_stats);
+ if (ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "ksym tracer stats\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
Index: kernel/trace/trace_selftest.c
===================================================================
--- kernel/trace/trace_selftest.c.orig
+++ kernel/trace/trace_selftest.c
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(stru
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -749,3 +750,38 @@ trace_selftest_startup_branch(struct tra
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-04-07 6:37 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
@ 2009-04-08 8:02 ` Frederic Weisbecker
2009-04-08 11:12 ` K.Prasad
0 siblings, 1 reply; 33+ messages in thread
From: Frederic Weisbecker @ 2009-04-08 8:02 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Steven Rostedt,
Steven Rostedt
Hi Prasad,
On Tue, Apr 07, 2009 at 12:07:14PM +0530, K.Prasad wrote:
> This patch adds an ftrace plugin to detect and profile memory access over kernel
> variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> Acked-by: Steven Rostedt <srostedt@redhat.com>
> ---
> kernel/trace/Kconfig | 21 +
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 25 +
> kernel/trace/trace_ksym.c | 558 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_selftest.c | 36 ++
> 5 files changed, 641 insertions(+)
>
> Index: kernel/trace/Kconfig
> ===================================================================
> --- kernel/trace/Kconfig.orig
> +++ kernel/trace/Kconfig
> @@ -268,6 +268,27 @@ config POWER_TRACER
> power management decisions, specifically the C-state and P-state
> behavior.
>
> +config KSYM_TRACER
> + bool "Trace read and write access on kernel memory locations"
> + depends on HAVE_HW_BREAKPOINT
> + select TRACING
> + help
> + This tracer helps find read and write operations on any given kernel
> + symbol i.e. /proc/kallsyms.
> +
> +config PROFILE_KSYM_TRACER
> + bool "Profile all kernel memory accesses on 'watched' variables"
> + depends on KSYM_TRACER
> + help
> + This tracer profiles kernel accesses on variables watched through the
> + ksym tracer ftrace plugin. Depending upon the hardware, all read
> + and write operations on kernel variables can be monitored for
> + accesses.
> +
> + The results will be displayed in:
> + /debugfs/tracing/profile_ksym
> +
> + Say N if unsure.
>
> config STACK_TRACER
> bool "Trace max stack"
> Index: kernel/trace/Makefile
> ===================================================================
> --- kernel/trace/Makefile.orig
> +++ kernel/trace/Makefile
> @@ -46,5 +46,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_expo
> obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
> obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
> obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
> +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
>
> libftrace-y := ftrace.o
> Index: kernel/trace/trace.h
> ===================================================================
> --- kernel/trace/trace.h.orig
> +++ kernel/trace/trace.h
> @@ -12,6 +12,10 @@
> #include <trace/kmemtrace.h>
> #include <trace/power.h>
>
> +#ifdef CONFIG_KSYM_TRACER
> +#include <asm/hw_breakpoint.h>
> +#endif
> +
> enum trace_type {
> __TRACE_FIRST_TYPE = 0,
>
> @@ -37,6 +41,7 @@ enum trace_type {
> TRACE_KMEM_FREE,
> TRACE_POWER,
> TRACE_BLK,
> + TRACE_KSYM,
>
> __TRACE_LAST_TYPE,
> };
> @@ -212,6 +217,23 @@ struct syscall_trace_exit {
> unsigned long ret;
> };
>
> +#ifdef CONFIG_KSYM_TRACER
> +struct trace_ksym {
> + struct trace_entry ent;
> + struct hw_breakpoint *ksym_hbp;
> + unsigned long ksym_addr;
> + unsigned long ip;
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> + unsigned long counter;
> +#endif
> + struct hlist_node ksym_hlist;
> + char ksym_name[KSYM_NAME_LEN];
> + char p_name[TASK_COMM_LEN];
> +};
> +#else
> +struct trace_ksym {
> +};
> +#endif /* CONFIG_KSYM_TRACER */
> /*
Is this #ifdef CONFIG_KSYM_TRACER necessary?
On the off-case it wouldn't hurt I guess, neither
would it fill any irrelevant space.
> * trace_flag_type is an enumeration that holds different
> @@ -330,6 +352,7 @@ extern void __ftrace_bad_type(void);
> TRACE_SYSCALL_ENTER); \
> IF_ASSIGN(var, ent, struct syscall_trace_exit, \
> TRACE_SYSCALL_EXIT); \
> + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
> __ftrace_bad_type(); \
> } while (0)
>
> @@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
> struct trace_array *tr);
> extern int trace_selftest_startup_branch(struct tracer *trace,
> struct trace_array *tr);
> +extern int trace_selftest_startup_ksym(struct tracer *trace,
> + struct trace_array *tr);
> #endif /* CONFIG_FTRACE_STARTUP_TEST */
>
> extern void *head_page(struct trace_array_cpu *data);
> Index: kernel/trace/trace_ksym.c
> ===================================================================
> --- /dev/null
> +++ kernel/trace/trace_ksym.c
> @@ -0,0 +1,558 @@
> +/*
> + * trace_ksym.c - Kernel Symbol Tracer
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) IBM Corporation, 2009
> + */
> +
> +#include <linux/kallsyms.h>
> +#include <linux/uaccess.h>
> +#include <linux/debugfs.h>
> +#include <linux/ftrace.h>
> +#include <linux/module.h>
> +#include <linux/jhash.h>
> +#include <linux/fs.h>
> +
> +#include "trace_output.h"
> +#include "trace_stat.h"
> +#include "trace.h"
> +
> +/* For now, let us restrict the no. of symbols traced simultaneously to number
> + * of available hardware breakpoint registers.
> + */
> +#define KSYM_TRACER_MAX HB_NUM
> +
> +#define KSYM_TRACER_OP_LEN 3 /* rw- */
> +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> +
> +static int ksym_selftest_dummy;
> +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
> +
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> +static struct trace_array *ksym_trace_array;
> +
> +DEFINE_MUTEX(ksym_tracer_mutex);
> +
> +static unsigned int ksym_filter_entry_count;
> +static unsigned int ksym_tracing_enabled;
> +
> +static HLIST_HEAD(ksym_filter_head);
> +
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +
> +#define MAX_UL_INT 0xffffffff
> +DEFINE_SPINLOCK(ksym_stat_lock);
> +
> +void ksym_collect_stats(unsigned long hbp_hit_addr)
> +{
> + struct hlist_node *node;
> + struct trace_ksym *entry;
> +
> + spin_lock(&ksym_stat_lock);
Does this spinlock protect your list iteration against concurrent removal
or inserts? Other readers and writers of this list use ksym_tracer_mutex
to synchronize, it looks like this site override this rule.
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + if ((entry->ksym_addr == hbp_hit_addr) &&
> + (entry->counter <= MAX_UL_INT)) {
> + entry->counter++;
> + break;
> + }
> + }
> + spin_unlock(&ksym_stat_lock);
> +}
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> +
> +void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
> +{
> + struct ring_buffer_event *event;
> + struct trace_array *tr;
> + struct trace_ksym *entry;
> + int pc;
> +
> + if (!ksym_tracing_enabled)
> + return;
> +
> + tr = ksym_trace_array;
> + pc = preempt_count();
> +
> + event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
> + sizeof(*entry), 0, pc);
> + if (!event)
> + return;
> +
> + entry = ring_buffer_event_data(event);
> + strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
> + entry->ksym_hbp = hbp;
> + entry->ip = instruction_pointer(regs);
> + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> + ksym_collect_stats(hbp->info.address);
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> +
> + trace_buffer_unlock_commit(tr, event, 0, pc);
> +}
> +
> +/* Valid access types are represented as
> + *
> + * rw- : Set Read/Write Access Breakpoint
> + * -w- : Set Write Access Breakpoint
> + * --- : Clear Breakpoints
> + * --x : Set Execution Break points (Not available yet)
> + *
> + */
> +static int ksym_trace_get_access_type(char *access_str)
> +{
> + int pos, access = 0;
> +
> + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
> + switch (access_str[pos]) {
> + case 'r':
> + access += (pos == 0) ? 4 : -1;
> + break;
> + case 'w':
> + access += (pos == 1) ? 2 : -1;
> + break;
> + case '-':
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + switch (access) {
> + case 6:
> + access = HW_BREAKPOINT_RW;
> + break;
> + case 2:
> + access = HW_BREAKPOINT_WRITE;
> + break;
> + case 0:
> + access = 0;
> + }
> +
> + return access;
> +}
> +
> +/*
> + * There can be several possible malformed requests and we attempt to capture
> + * all of them. We enumerate some of the rules
> + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
> + * i.e. multiple ':' symbols disallowed. Possible uses are of the form
> + * <module>:<ksym_name>:<op>.
> + * 2. No delimiter symbol ':' in the input string
> + * 3. Spurious operator symbols or symbols not in their respective positions
> + * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
> + * 5. Kernel symbol not a part of /proc/kallsyms
> + * 6. Duplicate requests
> + */
> +static int parse_ksym_trace_str(char *input_string, char **ksymname,
> + unsigned long *addr)
> +{
> + char *delimiter = ":";
> + int ret;
> +
> + ret = -EINVAL;
> + *ksymname = strsep(&input_string, delimiter);
> + *addr = kallsyms_lookup_name(*ksymname);
> +
> + /* Check for malformed request: (2), (1) and (5) */
> + if ((!input_string) ||
> + (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
> + (*addr == 0))
> + goto return_code;
> + ret = ksym_trace_get_access_type(input_string);
> +
> +return_code:
> + return ret;
> +}
> +
> +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> +{
> + struct trace_ksym *entry;
> + int ret;
> +
> + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> + " new requests for tracing can be accepted now.\n",
> + KSYM_TRACER_MAX);
> + return -ENOSPC;
> + }
> +
> + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> + if (!entry)
> + return -ENOMEM;
> +
> + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> + if (!entry->ksym_hbp) {
> + kfree(entry);
> + return -ENOMEM;
> + }
> +
> + entry->ksym_hbp->info.name = ksymname;
> + entry->ksym_hbp->info.type = op;
> + entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> +
> + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> +
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> + if (ret < 0) {
> + printk(KERN_INFO "ksym_tracer request failed. Try again"
> + " later!!\n");
> + kfree(entry->ksym_hbp);
> + kfree(entry);
> + return -EAGAIN;
> + }
> + hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
> + ksym_filter_entry_count++;
> +
> + return 0;
> +}
> +
> +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> + ssize_t ret, cnt = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> + entry->ksym_hbp->info.name);
> + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "-w-\n");
> + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "rw-\n");
> + }
> + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> + mutex_unlock(&ksym_tracer_mutex);
> +
> + return ret;
> +}
> +
> +static ssize_t ksym_trace_filter_write(struct file *file,
> + const char __user *buffer,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char *input_string, *ksymname = NULL;
> + unsigned long ksym_addr = 0;
> + int ret, op, changed = 0;
> +
> + /* Ignore echo "" > ksym_trace_filter */
> + if (count == 0)
> + return 0;
> +
> + input_string = kzalloc(count, GFP_KERNEL);
> + if (!input_string)
> + return -ENOMEM;
> +
> + if (copy_from_user(input_string, buffer, count)) {
> + kfree(input_string);
> + return -EFAULT;
> + }
> +
> + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> + if (ret < 0) {
> + kfree(input_string);
> + return ret;
> + }
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + ret = -EINVAL;
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + if (entry->ksym_addr == ksym_addr) {
> + /* Check for malformed request: (6) */
> + if (entry->ksym_hbp->info.type != op)
> + changed = 1;
> + else
> + goto err_ret;
> + break;
> + }
> + }
> + if (changed) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbp);
> + entry->ksym_hbp->info.type = op;
> + if (op > 0) {
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> + if (ret == 0) {
> + ret = count;
> + goto unlock_ret_path;
> + }
> + }
> + ksym_filter_entry_count--;
> + hlist_del(&(entry->ksym_hlist));
> + kfree(entry->ksym_hbp);
> + kfree(entry);
> + ret = count;
> + goto err_ret;
> + } else {
> + /* Check for malformed request: (4) */
> + if (op == 0)
> + goto err_ret;
> + ret = process_new_ksym_entry(ksymname, op, ksym_addr);
> + if (ret)
> + goto err_ret;
> + }
> + ret = count;
> + goto unlock_ret_path;
> +
> +err_ret:
> + kfree(input_string);
> +
> +unlock_ret_path:
> + mutex_unlock(&ksym_tracer_mutex);
> + return ret;
> +}
> +
> +static const struct file_operations ksym_tracing_fops = {
> + .open = tracing_open_generic,
> + .read = ksym_trace_filter_read,
> + .write = ksym_trace_filter_write,
> +};
> +
> +static void ksym_trace_reset(struct trace_array *tr)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node, *node1;
> +
> + ksym_tracing_enabled = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
> + ksym_hlist) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbp);
> + ksym_filter_entry_count--;
> + hlist_del(&(entry->ksym_hlist));
> +
> + /* Free the 'input_string' only if reset
> + * after startup self-test
> + */
> +#ifdef CONFIG_FTRACE_SELFTEST
> + if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
> + strlen(KSYM_SELFTEST_ENTRY)) != 0)
> +#endif /* CONFIG_FTRACE_SELFTEST*/
> + kfree(entry->ksym_hbp->info.name);
> + kfree(entry->ksym_hbp);
> + kfree(entry);
> + }
> + mutex_unlock(&ksym_tracer_mutex);
> +
> +}
> +
> +static int ksym_trace_init(struct trace_array *tr)
> +{
> + int cpu, ret = 0;
> +
> + for_each_online_cpu(cpu)
> + tracing_reset(tr, cpu);
> +
> + ksym_tracing_enabled = 1;
> + ksym_trace_array = tr;
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> + /* Check if we are re-entering self-test code during initialisation */
> + if (ksym_selftest_dummy)
> + goto ret_path;
> +
> + ksym_selftest_dummy = 0;
> +
> + /* Register the read-write tracing request */
> + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> + (unsigned long)(&ksym_selftest_dummy));
> +
> + if (ret < 0) {
> + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> + goto ret_path;
> + }
> + /* Perform a read and a write operation over the dummy variable to
> + * trigger the tracer
> + */
> + if (ksym_selftest_dummy == 0)
> + ksym_selftest_dummy++;
> +
> +ret_path:
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> + return ret;
> +}
> +
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
> +
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + if (entry->type != TRACE_KSYM)
> + return TRACE_TYPE_UNHANDLED;
> +
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + entry->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbp->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +struct tracer ksym_tracer __read_mostly =
> +{
> + .name = "ksym_tracer",
> + .init = ksym_trace_init,
> + .reset = ksym_trace_reset,
> +#ifdef CONFIG_FTRACE_SELFTEST
> + .selftest = trace_selftest_startup_ksym,
> +#endif
> + .print_header = ksym_trace_print_header,
> + .print_line = ksym_trace_output
> +};
> +
> +__init static int init_ksym_trace(void)
> +{
> + struct dentry *d_tracer;
> + struct dentry *entry;
> +
> + d_tracer = tracing_init_dentry();
> + ksym_filter_entry_count = 0;
> +
> + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> + NULL, &ksym_tracing_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'ksym_trace_filter' file\n");
> +
> + return register_tracer(&ksym_tracer);
> +}
> +device_initcall(init_ksym_trace);
> +
> +
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +static int ksym_tracer_stat_headers(struct seq_file *m)
> +{
> + seq_printf(m, " Access type ");
> + seq_printf(m, " Symbol Counter \n");
> + return 0;
> +}
> +
> +static int ksym_tracer_stat_show(struct seq_file *m, void *v)
> +{
> + struct hlist_node *stat = v;
> + struct trace_ksym *entry;
> + int access_type = 0;
> + char fn_name[KSYM_NAME_LEN];
> +
> + entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
> +
> + if (entry->ksym_hbp)
> + access_type = entry->ksym_hbp->info.type;
> +
> + switch (access_type) {
> + case HW_BREAKPOINT_WRITE:
> + seq_printf(m, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + seq_printf(m, " RW ");
> + break;
> + default:
> + seq_printf(m, " NA ");
> + }
> +
> + if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
> + seq_printf(m, " %s ", fn_name);
> + else
> + seq_printf(m, " <NA> ");
> +
> + seq_printf(m, "%15lu\n", entry->counter);
> + return 0;
> +}
> +
> +static void *ksym_tracer_stat_start(void)
> +{
> + return &(ksym_filter_head.first);
> +}
> +
> +static void *
> +ksym_tracer_stat_next(void *v, int idx)
> +{
> + struct hlist_node *stat = v;
> +
> + return stat->next;
> +}
> +
> +static struct tracer_stat ksym_tracer_stats = {
> + .name = "ksym_tracer",
> + .stat_start = ksym_tracer_stat_start,
> + .stat_next = ksym_tracer_stat_next,
> + .stat_headers = ksym_tracer_stat_headers,
> + .stat_show = ksym_tracer_stat_show
> +};
> +
> +__init static int ksym_tracer_stat_init(void)
> +{
> + int ret;
> +
> + ret = register_stat_tracer(&ksym_tracer_stats);
> + if (ret) {
> + printk(KERN_WARNING "Warning: could not register "
> + "ksym tracer stats\n");
> + return 1;
> + }
> +
> + return 0;
> +}
> +fs_initcall(ksym_tracer_stat_init);
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> Index: kernel/trace/trace_selftest.c
> ===================================================================
> --- kernel/trace/trace_selftest.c.orig
> +++ kernel/trace/trace_selftest.c
> @@ -16,6 +16,7 @@ static inline int trace_valid_entry(stru
> case TRACE_BRANCH:
> case TRACE_GRAPH_ENT:
> case TRACE_GRAPH_RET:
> + case TRACE_KSYM:
> return 1;
> }
> return 0;
> @@ -749,3 +750,38 @@ trace_selftest_startup_branch(struct tra
> return ret;
> }
> #endif /* CONFIG_BRANCH_TRACER */
> +
> +#ifdef CONFIG_KSYM_TRACER
> +int
> +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> +{
> + unsigned long count;
> + int ret;
> +
> + /* start the tracing */
> + ret = tracer_init(trace, tr);
> + if (ret) {
> + warn_failed_init_tracer(trace, ret);
> + return ret;
> + }
> +
> + /* Sleep for a 1/10 of a second */
> + msleep(100);
> + /* stop the tracing. */
> + tracing_stop();
> + /* check the trace buffer */
> + ret = trace_test_buffer(tr, &count);
> + trace->reset(tr);
> + tracing_start();
> +
> + /* read & write operations - one each is performed on the dummy variable
> + * triggering two entries in the trace buffer
> + */
> + if (!ret && count != 2) {
> + printk(KERN_CONT "Ksym tracer startup test failed");
> + ret = -1;
> + }
> +
> + return ret;
> +}
> +#endif /* CONFIG_KSYM_TRACER */
>
The rest looks good.
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2
2009-04-08 8:02 ` Frederic Weisbecker
@ 2009-04-08 11:12 ` K.Prasad
0 siblings, 0 replies; 33+ messages in thread
From: K.Prasad @ 2009-04-08 11:12 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Alan Stern, Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Steven Rostedt,
Steven Rostedt
On Wed, Apr 08, 2009 at 10:02:03AM +0200, Frederic Weisbecker wrote:
> Hi Prasad,
>
>
> On Tue, Apr 07, 2009 at 12:07:14PM +0530, K.Prasad wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over kernel
> > variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > Acked-by: Steven Rostedt <srostedt@redhat.com>
> > ---
> > kernel/trace/Kconfig | 21 +
> > kernel/trace/Makefile | 1
> > kernel/trace/trace.h | 25 +
> > kernel/trace/trace_ksym.c | 558 ++++++++++++++++++++++++++++++++++++++++++
> > kernel/trace/trace_selftest.c | 36 ++
> > 5 files changed, 641 insertions(+)
> >
> > Index: kernel/trace/Kconfig
> > ===================================================================
> > --- kernel/trace/Kconfig.orig
> > +++ kernel/trace/Kconfig
> > @@ -268,6 +268,27 @@ config POWER_TRACER
> > power management decisions, specifically the C-state and P-state
> > behavior.
> >
> > +config KSYM_TRACER
> > + bool "Trace read and write access on kernel memory locations"
> > + depends on HAVE_HW_BREAKPOINT
> > + select TRACING
> > + help
> > + This tracer helps find read and write operations on any given kernel
> > + symbol i.e. /proc/kallsyms.
> > +
> > +config PROFILE_KSYM_TRACER
> > + bool "Profile all kernel memory accesses on 'watched' variables"
> > + depends on KSYM_TRACER
> > + help
> > + This tracer profiles kernel accesses on variables watched through the
> > + ksym tracer ftrace plugin. Depending upon the hardware, all read
> > + and write operations on kernel variables can be monitored for
> > + accesses.
> > +
> > + The results will be displayed in:
> > + /debugfs/tracing/profile_ksym
> > +
> > + Say N if unsure.
> >
> > config STACK_TRACER
> > bool "Trace max stack"
> > Index: kernel/trace/Makefile
> > ===================================================================
> > --- kernel/trace/Makefile.orig
> > +++ kernel/trace/Makefile
> > @@ -46,5 +46,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_expo
> > obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
> > obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
> > obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
> > +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
> >
> > libftrace-y := ftrace.o
> > Index: kernel/trace/trace.h
> > ===================================================================
> > --- kernel/trace/trace.h.orig
> > +++ kernel/trace/trace.h
> > @@ -12,6 +12,10 @@
> > #include <trace/kmemtrace.h>
> > #include <trace/power.h>
> >
> > +#ifdef CONFIG_KSYM_TRACER
> > +#include <asm/hw_breakpoint.h>
> > +#endif
> > +
> > enum trace_type {
> > __TRACE_FIRST_TYPE = 0,
> >
> > @@ -37,6 +41,7 @@ enum trace_type {
> > TRACE_KMEM_FREE,
> > TRACE_POWER,
> > TRACE_BLK,
> > + TRACE_KSYM,
> >
> > __TRACE_LAST_TYPE,
> > };
> > @@ -212,6 +217,23 @@ struct syscall_trace_exit {
> > unsigned long ret;
> > };
> >
> > +#ifdef CONFIG_KSYM_TRACER
> > +struct trace_ksym {
> > + struct trace_entry ent;
> > + struct hw_breakpoint *ksym_hbp;
> > + unsigned long ksym_addr;
> > + unsigned long ip;
> > +#ifdef CONFIG_PROFILE_KSYM_TRACER
> > + unsigned long counter;
> > +#endif
> > + struct hlist_node ksym_hlist;
> > + char ksym_name[KSYM_NAME_LEN];
> > + char p_name[TASK_COMM_LEN];
> > +};
> > +#else
> > +struct trace_ksym {
> > +};
> > +#endif /* CONFIG_KSYM_TRACER */
> > /*
>
>
> Is this #ifdef CONFIG_KSYM_TRACER necessary?
> On the off-case it wouldn't hurt I guess, neither
> would it fill any irrelevant space.
>
>
>
I agree. The other structures used various plugins are also defined
unconditionally. I will remove them when submitting the patch again for
inclusion.
> > * trace_flag_type is an enumeration that holds different
> > @@ -330,6 +352,7 @@ extern void __ftrace_bad_type(void);
> > TRACE_SYSCALL_ENTER); \
> > IF_ASSIGN(var, ent, struct syscall_trace_exit, \
> > TRACE_SYSCALL_EXIT); \
> > + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
> > __ftrace_bad_type(); \
> > } while (0)
> >
> > @@ -593,6 +616,8 @@ extern int trace_selftest_startup_syspro
> > struct trace_array *tr);
> > extern int trace_selftest_startup_branch(struct tracer *trace,
> > struct trace_array *tr);
> > +extern int trace_selftest_startup_ksym(struct tracer *trace,
> > + struct trace_array *tr);
> > #endif /* CONFIG_FTRACE_STARTUP_TEST */
> >
> > extern void *head_page(struct trace_array_cpu *data);
> > Index: kernel/trace/trace_ksym.c
> > ===================================================================
> > --- /dev/null
> > +++ kernel/trace/trace_ksym.c
> > @@ -0,0 +1,558 @@
> > +/*
> > + * trace_ksym.c - Kernel Symbol Tracer
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> > + *
> > + * Copyright (C) IBM Corporation, 2009
> > + */
> > +
> > +#include <linux/kallsyms.h>
> > +#include <linux/uaccess.h>
> > +#include <linux/debugfs.h>
> > +#include <linux/ftrace.h>
> > +#include <linux/module.h>
> > +#include <linux/jhash.h>
> > +#include <linux/fs.h>
> > +
> > +#include "trace_output.h"
> > +#include "trace_stat.h"
> > +#include "trace.h"
> > +
> > +/* For now, let us restrict the no. of symbols traced simultaneously to number
> > + * of available hardware breakpoint registers.
> > + */
> > +#define KSYM_TRACER_MAX HB_NUM
> > +
> > +#define KSYM_TRACER_OP_LEN 3 /* rw- */
> > +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
> > +
> > +#ifdef CONFIG_FTRACE_SELFTEST
> > +
> > +static int ksym_selftest_dummy;
> > +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
> > +
> > +#endif /* CONFIG_FTRACE_SELFTEST */
> > +
> > +static struct trace_array *ksym_trace_array;
> > +
> > +DEFINE_MUTEX(ksym_tracer_mutex);
> > +
> > +static unsigned int ksym_filter_entry_count;
> > +static unsigned int ksym_tracing_enabled;
> > +
> > +static HLIST_HEAD(ksym_filter_head);
> > +
> > +#ifdef CONFIG_PROFILE_KSYM_TRACER
> > +
> > +#define MAX_UL_INT 0xffffffff
> > +DEFINE_SPINLOCK(ksym_stat_lock);
> > +
> > +void ksym_collect_stats(unsigned long hbp_hit_addr)
> > +{
> > + struct hlist_node *node;
> > + struct trace_ksym *entry;
> > +
> > + spin_lock(&ksym_stat_lock);
>
>
>
> Does this spinlock protect your list iteration against concurrent removal
> or inserts? Other readers and writers of this list use ksym_tracer_mutex
> to synchronize, it looks like this site override this rule.
>
>
>
While the ksym_stat_lock spinlock was meant to protect the counter
updation, you've unearthed the fact that the hlist whose head is
ksym_filter_head needs to be protected from simultaneous read/write
operations that can happen through ksym_trace_filter_write().
Since the same hlist is updated/read from both exception context and
otherwise (in the control plane of ftrace), ksym_stat_lock spinlock
will be used universally after replacing the ksym_tracer_mutex (this
could potentially be treated with RCU too, but choosing spinlock for
now).
I will send out another version of this patch that includes this change,
and would accept your acknowledgement. Thanks for pointing out the
potential issue.
-- K.Prasad
^ permalink raw reply [flat|nested] 33+ messages in thread
end of thread, other threads:[~2009-04-08 11:12 UTC | newest]
Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20090324152028.754123712@K.Prasad>
2009-03-24 15:24 ` [Patch 01/11] Introducing generic hardware breakpoint handler interfaces K.Prasad
2009-03-24 15:25 ` [Patch 02/11] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
2009-03-24 15:25 ` [Patch 03/11] Modifying generic debug exception to use thread-specific debug registers K.Prasad
2009-03-24 15:26 ` [Patch 04/11] Introduce user-space " K.Prasad
2009-03-24 15:26 ` [Patch 05/11] Use wrapper routines around debug registers in processor related functions K.Prasad
2009-03-24 15:26 ` [Patch 06/11] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
2009-03-24 15:26 ` [Patch 07/11] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
2009-03-24 15:26 ` [Patch 08/11] Modify Ptrace routines to access breakpoint registers K.Prasad
2009-03-24 15:27 ` [Patch 09/11] Cleanup HW Breakpoint registers before kexec K.Prasad
2009-03-24 15:27 ` [Patch 10/11] Sample HW breakpoint over kernel data address K.Prasad
2009-03-24 15:28 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
2009-03-22 9:35 ` Pavel Machek
2009-03-25 3:03 ` Steven Rostedt
2009-03-25 3:30 ` K.Prasad
2009-03-25 3:48 ` Steven Rostedt
2009-03-25 6:35 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v3 K.Prasad
2009-03-25 13:24 ` Steven Rostedt
[not found] <20090407063058.301701787@prasadkr_t60p.in.ibm.com>
2009-04-07 6:37 ` [Patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v2 K.Prasad
2009-04-08 8:02 ` Frederic Weisbecker
2009-04-08 11:12 ` K.Prasad
[not found] <20090319234044.410725944@K.Prasad>
2009-03-19 23:50 ` K.Prasad
2009-03-20 9:04 ` Frederic Weisbecker
2009-03-21 16:24 ` K.Prasad
2009-03-21 16:39 ` Steven Rostedt
2009-03-23 19:08 ` K.Prasad
[not found] <20090307045120.039324630@linux.vnet.ibm.com>
2009-03-07 5:07 ` prasad
2009-03-07 14:53 ` KOSAKI Motohiro
2009-03-07 18:21 ` K.Prasad
2009-03-08 10:09 ` Ingo Molnar
2009-03-08 11:00 ` Frederic Weisbecker
2009-03-10 12:21 ` K.Prasad
2009-03-10 19:55 ` Frederic Weisbecker
2009-03-09 21:36 ` K.Prasad
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox