* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090424055710.764502564@prasadkr_t60p.in.ibm.com>
@ 2009-04-24 6:17 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-04-24 6:17 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 8023 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of abstract debug registers in
process-handling routines.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/include/asm/processor.h | 4 ----
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
4 files changed, 70 insertions(+), 21 deletions(-)
Index: arch/x86/kernel/process.c
===================================================================
--- arch/x86/kernel/process.c.orig
+++ arch/x86/kernel/process.c
@@ -16,6 +16,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -47,6 +49,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -105,14 +109,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -194,16 +193,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: arch/x86/kernel/process_32.c
===================================================================
--- arch/x86/kernel/process_32.c.orig
+++ arch/x86/kernel/process_32.c
@@ -61,6 +61,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -265,7 +267,14 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -285,10 +294,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -427,6 +439,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: arch/x86/kernel/process_64.c
===================================================================
--- arch/x86/kernel/process_64.c.orig
+++ arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -248,6 +250,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -347,6 +357,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -492,6 +505,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
+
return prev_p;
}
Index: arch/x86/include/asm/processor.h
===================================================================
--- arch/x86/include/asm/processor.h.orig
+++ arch/x86/include/asm/processor.h
@@ -432,10 +432,6 @@ struct thread_struct {
unsigned long fs;
unsigned long gs;
/* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
unsigned long debugreg[HB_NUM];
unsigned long debugreg6;
unsigned long debugreg7;
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
@ 2009-05-11 11:52 ` K.Prasad
2009-05-28 5:28 ` David Gibson
2009-05-11 11:52 ` [Patch 02/12] Introducing generic hardware breakpoint handler interfaces K.Prasad
` (10 subsequent siblings)
11 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:52 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: definitions_to_prepare_for_hwbkpt_01 --]
[-- Type: text/plain, Size: 11044 bytes --]
This patch introduces header files containing constants, structure definitions
and declaration of functions used by the hardware breakpoint interface code.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/include/asm/debugreg.h | 31 +++++++
arch/x86/include/asm/hw_breakpoint.h | 66 ++++++++++++++++
arch/x86/include/asm/processor.h | 4 +
include/asm-generic/hw_breakpoint.h | 139 +++++++++++++++++++++++++++++++++++
4 files changed, 240 insertions(+)
Index: include/asm-generic/hw_breakpoint.h
===================================================================
--- /dev/null
+++ include/asm-generic/hw_breakpoint.h
@@ -0,0 +1,139 @@
+#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
+#define _ASM_GENERIC_HW_BREAKPOINT_H
+
+#ifndef __ARCH_HW_BREAKPOINT_H
+#error "Please don't include this file directly"
+#endif
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+/**
+ * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
+ * @triggered: callback invoked after target address access
+ * @info: arch-specific breakpoint info (address, length, and type)
+ *
+ * %hw_breakpoint structures are the kernel's way of representing
+ * hardware breakpoints. These are data breakpoints
+ * (also known as "watchpoints", triggered on data access), and the breakpoint's
+ * target address can be located in either kernel space or user space.
+ *
+ * The breakpoint's address, length, and type are highly
+ * architecture-specific. The values are encoded in the @info field; you
+ * specify them when registering the breakpoint. To examine the encoded
+ * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
+ * below.
+ *
+ * The address is specified as a regular kernel pointer (for kernel-space
+ * breakponts) or as an %__user pointer (for user-space breakpoints).
+ * With register_user_hw_breakpoint(), the address must refer to a
+ * location in user space. The breakpoint will be active only while the
+ * requested task is running. Conversely with
+ * register_kernel_hw_breakpoint(), the address must refer to a location
+ * in kernel space, and the breakpoint will be active on all CPUs
+ * regardless of the current task.
+ *
+ * The length is the breakpoint's extent in bytes, which is subject to
+ * certain limitations. include/asm/hw_breakpoint.h contains macros
+ * defining the available lengths for a specific architecture. Note that
+ * the address's alignment must match the length. The breakpoint will
+ * catch accesses to any byte in the range from address to address +
+ * (length - 1).
+ *
+ * The breakpoint's type indicates the sort of access that will cause it
+ * to trigger. Possible values may include:
+ *
+ * %HW_BREAKPOINT_RW (triggered on read or write access),
+ * %HW_BREAKPOINT_WRITE (triggered on write access), and
+ * %HW_BREAKPOINT_READ (triggered on read access).
+ *
+ * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
+ * possibilities are available on all architectures. Execute breakpoints
+ * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
+ *
+ * When a breakpoint gets hit, the @triggered callback is
+ * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
+ * processor registers.
+ * Data breakpoints occur after the memory access has taken place.
+ * Breakpoints are disabled during execution @triggered, to avoid
+ * recursive traps and allow unhindered access to breakpointed memory.
+ *
+ * This sample code sets a breakpoint on pid_max and registers a callback
+ * function for writes to that variable. Note that it is not portable
+ * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * #include <asm/hw_breakpoint.h>
+ *
+ * struct hw_breakpoint my_bp;
+ *
+ * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
+ * {
+ * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
+ * dump_stack();
+ * .......<more debugging output>........
+ * }
+ *
+ * static struct hw_breakpoint my_bp;
+ *
+ * static int init_module(void)
+ * {
+ * ..........<do anything>............
+ * my_bp.info.type = HW_BREAKPOINT_WRITE;
+ * my_bp.info.len = HW_BREAKPOINT_LEN_4;
+ *
+ * my_bp.installed = (void *)my_bp_installed;
+ *
+ * rc = register_kernel_hw_breakpoint(&my_bp);
+ * ..........<do anything>............
+ * }
+ *
+ * static void cleanup_module(void)
+ * {
+ * ..........<do anything>............
+ * unregister_kernel_hw_breakpoint(&my_bp);
+ * ..........<do anything>............
+ * }
+ *
+ * ----------------------------------------------------------------------
+ */
+struct hw_breakpoint {
+ void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
+ struct arch_hw_breakpoint info;
+};
+
+/*
+ * len and type values are defined in include/asm/hw_breakpoint.h.
+ * Available values vary according to the architecture. On i386 the
+ * possibilities are:
+ *
+ * HW_BREAKPOINT_LEN_1
+ * HW_BREAKPOINT_LEN_2
+ * HW_BREAKPOINT_LEN_4
+ * HW_BREAKPOINT_RW
+ * HW_BREAKPOINT_READ
+ *
+ * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
+ * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
+ * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
+ */
+
+int register_user_hw_breakpoint(struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+void unregister_user_hw_breakpoint(struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
+void switch_to_none_hw_breakpoint(void);
+
+extern unsigned int hbp_kernel_pos;
+extern spinlock_t hw_breakpoint_lock;
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
Index: arch/x86/include/asm/hw_breakpoint.h
===================================================================
--- /dev/null
+++ arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,66 @@
+#ifndef _I386_HW_BREAKPOINT_H
+#define _I386_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+#define __ARCH_HW_BREAKPOINT_H
+
+struct arch_hw_breakpoint {
+ char *name; /* Contains name of the symbol to set bkpt */
+ unsigned long address;
+ u8 len;
+ u8 type;
+};
+
+#include <linux/kdebug.h>
+#include <asm-generic/hw_breakpoint.h>
+
+/* Available HW breakpoint length encodings */
+#define HW_BREAKPOINT_LEN_1 0x40
+#define HW_BREAKPOINT_LEN_2 0x44
+#define HW_BREAKPOINT_LEN_4 0x4c
+#define HW_BREAKPOINT_LEN_EXECUTE 0x40
+
+#ifdef CONFIG_X86_64
+#define HW_BREAKPOINT_LEN_8 0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define HW_BREAKPOINT_EXECUTE 0x80
+/* trigger on memory write */
+#define HW_BREAKPOINT_WRITE 0x81
+/* trigger on memory read or write */
+#define HW_BREAKPOINT_RW 0x83
+
+/* Total number of available HW breakpoint registers */
+#define HB_NUM 4
+
+extern struct hw_breakpoint *hbp_kernel[HB_NUM];
+DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
+extern unsigned int hbp_user_refcount[HB_NUM];
+
+/*
+ * Ptrace support: breakpoint trigger routine.
+ */
+int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp);
+void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk);
+
+void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
+void arch_uninstall_thread_hw_breakpoint(void);
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len);
+int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
+ struct task_struct *tsk);
+void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
+void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
+void arch_update_kernel_hw_breakpoint(void *);
+int hw_breakpoint_handler(struct die_args *args);
+int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+ unsigned long val, void *data);
+#endif /* __KERNEL__ */
+#endif /* _I386_HW_BREAKPOINT_H */
+
Index: arch/x86/include/asm/debugreg.h
===================================================================
--- arch/x86/include/asm/debugreg.h.orig
+++ arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
#define DR_TRAP1 (0x2) /* db1 */
#define DR_TRAP2 (0x4) /* db2 */
#define DR_TRAP3 (0x8) /* db3 */
+#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,32 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+/* For process management */
+void flush_thread_hw_breakpoint(struct task_struct *tsk);
+int copy_thread_hw_breakpoint(struct task_struct *tsk,
+ struct task_struct *child, unsigned long clone_flags);
+void dump_thread_hw_breakpoint(struct task_struct *tsk, int u_debugreg[8]);
+void switch_to_thread_hw_breakpoint(struct task_struct *tsk);
+
+/* For CPU management */
+void load_debug_registers(void);
+static inline void hw_breakpoint_disable(void)
+{
+ /* Zero the control register for HW Breakpoint */
+ set_debugreg(0UL, 7);
+
+ /* Zero-out the individual HW breakpoint address registers */
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+}
+
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_DEBUGREG_H */
Index: arch/x86/include/asm/processor.h
===================================================================
--- arch/x86/include/asm/processor.h.orig
+++ arch/x86/include/asm/processor.h
@@ -29,6 +29,7 @@ struct mm_struct;
#include <linux/threads.h>
#include <linux/init.h>
+#define HB_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -435,8 +436,11 @@ struct thread_struct {
unsigned long debugreg1;
unsigned long debugreg2;
unsigned long debugreg3;
+ unsigned long debugreg[HB_NUM];
unsigned long debugreg6;
unsigned long debugreg7;
+ /* Hardware breakpoint info */
+ struct hw_breakpoint *hbp[HB_NUM];
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 02/12] Introducing generic hardware breakpoint handler interfaces
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
2009-05-11 11:52 ` [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces K.Prasad
@ 2009-05-11 11:52 ` K.Prasad
2009-05-11 12:12 ` Bharata B Rao
2009-05-28 6:15 ` David Gibson
2009-05-11 11:53 ` [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
` (9 subsequent siblings)
11 siblings, 2 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:52 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: generic_hwbkpt_02 --]
[-- Type: text/plain, Size: 11479 bytes --]
This patch introduces the generic Hardware Breakpoint interfaces for both user
and kernel space requests.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/Kconfig | 4
kernel/Makefile | 1
kernel/hw_breakpoint.c | 367 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 372 insertions(+)
Index: arch/Kconfig
===================================================================
--- arch/Kconfig.orig
+++ arch/Kconfig
@@ -112,3 +112,7 @@ config HAVE_DMA_API_DEBUG
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+
+config HAVE_HW_BREAKPOINT
+ bool
+
Index: kernel/Makefile
===================================================================
--- kernel/Makefile.orig
+++ kernel/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: kernel/hw_breakpoint.c
===================================================================
--- /dev/null
+++ kernel/hw_breakpoint.c
@@ -0,0 +1,367 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ *
+ * This file contains the arch-independent routines. It is not meant
+ * to be compiled as a standalone source file; rather it should be
+ * #include'd by the arch-specific implementation.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_X86
+#include <asm/debugreg.h>
+#endif
+/*
+ * Spinlock that protects all (un)register operations over kernel/user-space
+ * breakpoint requests
+ */
+DEFINE_SPINLOCK(hw_breakpoint_lock);
+
+/* Array of kernel-space breakpoint structures */
+struct hw_breakpoint *hbp_kernel[HB_NUM];
+
+/*
+ * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
+ * modified but we need the older copy to handle any hbp exceptions. It will
+ * sync with hbp_kernel[] value after updation is done through IPIs.
+ */
+DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
+
+/*
+ * Kernel breakpoints grow downwards, starting from HB_NUM
+ * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
+ * kernel-space request. We will initialise it here and not in an __init
+ * routine because load_debug_registers(), which uses this variable can be
+ * called very early during CPU initialisation.
+ */
+unsigned int hbp_kernel_pos = HB_NUM;
+
+/*
+ * An array containing refcount of threads using a given bkpt register
+ * Accesses are synchronised by acquiring hw_breakpoint_lock
+ */
+unsigned int hbp_user_refcount[HB_NUM];
+
+/*
+ * Install the debug register values for a new thread.
+ */
+void switch_to_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ /* Set the debug register */
+ arch_install_thread_hw_breakpoint(tsk);
+}
+
+/*
+ * Install the debug register values for just the kernel, no thread.
+ */
+void switch_to_none_hw_breakpoint(void)
+{
+ arch_uninstall_thread_hw_breakpoint();
+}
+
+/*
+ * Load the debug registers during startup of a CPU.
+ */
+void load_debug_registers(void)
+{
+ unsigned long flags;
+ struct task_struct *tsk = current;
+
+ spin_lock_bh(&hw_breakpoint_lock);
+
+ /* Prevent IPIs for new kernel breakpoint updates */
+ local_irq_save(flags);
+ arch_update_kernel_hw_breakpoint(NULL);
+ local_irq_restore(flags);
+
+ if (test_tsk_thread_flag(tsk, TIF_DEBUG))
+ switch_to_thread_hw_breakpoint(tsk);
+
+ spin_unlock_bh(&hw_breakpoint_lock);
+}
+
+/*
+ * Erase all the hardware breakpoint info associated with a thread.
+ *
+ * If tsk != current then tsk must not be usable (for example, a
+ * child being cleaned up from a failed fork).
+ */
+void flush_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *thread = &(tsk->thread);
+
+ spin_lock_bh(&hw_breakpoint_lock);
+
+ /* The thread no longer has any breakpoints associated with it */
+ clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ for (i = 0; i < HB_NUM; i++) {
+ if (thread->hbp[i]) {
+ hbp_user_refcount[i]--;
+ kfree(thread->hbp[i]);
+ thread->hbp[i] = NULL;
+ }
+ }
+
+ arch_flush_thread_hw_breakpoint(tsk);
+
+ /* Actually uninstall the breakpoints if necessary */
+ if (tsk == current)
+ switch_to_none_hw_breakpoint();
+ spin_unlock_bh(&hw_breakpoint_lock);
+}
+
+/*
+ * Copy the hardware breakpoint info from a thread to its cloned child.
+ */
+int copy_thread_hw_breakpoint(struct task_struct *tsk,
+ struct task_struct *child, unsigned long clone_flags)
+{
+ /*
+ * We will assume that breakpoint settings are not inherited
+ * and the child starts out with no debug registers set.
+ * But what about CLONE_PTRACE?
+ */
+ clear_tsk_thread_flag(child, TIF_DEBUG);
+
+ /* We will call flush routine since the debugregs are not inherited */
+ arch_flush_thread_hw_breakpoint(child);
+
+ return 0;
+}
+
+int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc;
+
+ /* Do not overcommit. Fail if kernel has used the hbp registers */
+ if (pos >= hbp_kernel_pos)
+ return -ENOSPC;
+
+ rc = arch_validate_hwbkpt_settings(bp, tsk);
+ if (rc)
+ return rc;
+
+ thread->hbp[pos] = bp;
+ hbp_user_refcount[pos]++;
+
+ arch_update_user_hw_breakpoint(pos, tsk);
+ /*
+ * Does it need to be installed right now?
+ * Otherwise it will get installed the next time tsk runs
+ */
+ if (tsk == current)
+ switch_to_thread_hw_breakpoint(tsk);
+
+ return rc;
+}
+
+/*
+ * Modify the address of a hbp register already in use by the task
+ * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
+ */
+int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ struct thread_struct *thread = &(tsk->thread);
+
+ if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
+ return -EINVAL;
+
+ if (thread->hbp[pos] == NULL)
+ return -EINVAL;
+
+ thread->hbp[pos] = bp;
+ /*
+ * 'pos' must be that of a hbp register already used by 'tsk'
+ * Otherwise arch_modify_user_hw_breakpoint() will fail
+ */
+ arch_update_user_hw_breakpoint(pos, tsk);
+
+ if (tsk == current)
+ switch_to_thread_hw_breakpoint(tsk);
+
+ return 0;
+}
+
+void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
+{
+ hbp_user_refcount[pos]--;
+ tsk->thread.hbp[pos] = NULL;
+
+ arch_update_user_hw_breakpoint(pos, tsk);
+
+ if (tsk == current)
+ switch_to_thread_hw_breakpoint(tsk);
+}
+
+/**
+ * register_user_hw_breakpoint - register a hardware breakpoint for user space
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ * @bp: the breakpoint structure to register
+ *
+ * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
+ * @bp->triggered must be set properly before invocation
+ *
+ */
+int register_user_hw_breakpoint(struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int i, rc = -ENOSPC;
+
+ spin_lock_bh(&hw_breakpoint_lock);
+
+ for (i = 0; i < hbp_kernel_pos; i++) {
+ if (!thread->hbp[i]) {
+ rc = __register_user_hw_breakpoint(i, tsk, bp);
+ break;
+ }
+ }
+
+ spin_unlock_bh(&hw_breakpoint_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
+
+/**
+ * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ * @bp: the breakpoint structure to unregister
+ *
+ */
+void unregister_user_hw_breakpoint(struct task_struct *tsk,
+ struct hw_breakpoint *bp)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int i;
+
+ spin_lock_bh(&hw_breakpoint_lock);
+ for (i = 0; i < hbp_kernel_pos; i++) {
+ if (bp == thread->hbp[i]) {
+ __unregister_user_hw_breakpoint(i, tsk);
+ break;
+ }
+ }
+ spin_unlock_bh(&hw_breakpoint_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
+
+/**
+ * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
+ * @bp: the breakpoint structure to register
+ *
+ * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
+ * @bp->triggered must be set properly before invocation
+ *
+ */
+int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+{
+ int rc;
+
+ rc = arch_validate_hwbkpt_settings(bp, NULL);
+ if (rc)
+ return rc;
+
+ spin_lock_bh(&hw_breakpoint_lock);
+
+ rc = -EINVAL;
+ /* Check if we are over-committing */
+ if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
+ hbp_kernel_pos--;
+ hbp_kernel[hbp_kernel_pos] = bp;
+ on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
+ rc = 0;
+ }
+
+ spin_unlock_bh(&hw_breakpoint_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
+
+/**
+ * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
+ * @bp: the breakpoint structure to unregister
+ *
+ * Uninstalls and unregisters @bp.
+ */
+void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
+{
+ int i, j;
+
+ spin_lock_bh(&hw_breakpoint_lock);
+
+ /* Find the 'bp' in our list of breakpoints for kernel */
+ for (i = hbp_kernel_pos; i < HB_NUM; i++)
+ if (bp == hbp_kernel[i])
+ break;
+
+ /* Check if we did not find a match for 'bp'. If so return early */
+ if (i == HB_NUM) {
+ spin_unlock_bh(&hw_breakpoint_lock);
+ return;
+ }
+
+ /*
+ * We'll shift the breakpoints one-level above to compact if
+ * unregistration creates a hole
+ */
+ for (j = i; j > hbp_kernel_pos; j--)
+ hbp_kernel[j] = hbp_kernel[j-1];
+
+ hbp_kernel[hbp_kernel_pos] = NULL;
+ on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
+ hbp_kernel_pos++;
+
+ spin_unlock_bh(&hw_breakpoint_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
+
+static struct notifier_block hw_breakpoint_exceptions_nb = {
+ .notifier_call = hw_breakpoint_exceptions_notify,
+ /* we need to be notified first */
+ .priority = 0x7fffffff
+};
+
+static int __init init_hw_breakpoint(void)
+{
+ return register_die_notifier(&hw_breakpoint_exceptions_nb);
+}
+
+core_initcall(init_hw_breakpoint);
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
2009-05-11 11:52 ` [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces K.Prasad
2009-05-11 11:52 ` [Patch 02/12] Introducing generic hardware breakpoint handler interfaces K.Prasad
@ 2009-05-11 11:53 ` K.Prasad
2009-05-28 6:35 ` David Gibson
2009-05-11 11:53 ` [Patch 04/12] Modifying generic debug exception to use thread-specific debug registers K.Prasad
` (8 subsequent siblings)
11 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:53 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: arch_x86_hwbkpt_03 --]
[-- Type: text/plain, Size: 10412 bytes --]
This patch introduces the arch-specific implementation of hw_breakpoint.c
inside x86 specific directories. They contain functions which help validate and
serve requests for using Hardware Breakpoint registers on x86 processors.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/Kconfig | 1
arch/x86/kernel/Makefile | 2
arch/x86/kernel/hw_breakpoint.c | 358 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 360 insertions(+), 1 deletion(-)
Index: arch/x86/Kconfig
===================================================================
--- arch/x86/Kconfig.orig
+++ arch/x86/Kconfig
@@ -47,6 +47,7 @@ config X86
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_ARCH_KMEMCHECK
+ select HAVE_HW_BREAKPOINT
config OUTPUT_FORMAT
string
Index: arch/x86/kernel/Makefile
===================================================================
--- arch/x86/kernel/Makefile.orig
+++ arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x86
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o
+obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
Index: arch/x86/kernel/hw_breakpoint.c
===================================================================
--- /dev/null
+++ arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,358 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/* Unmasked kernel DR7 value */
+static unsigned long kdr7;
+
+/*
+ * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
+ * Used to clear and verify the status of bits corresponding to DR0 - DR3
+ */
+static const unsigned long dr7_masks[HB_NUM] = {
+ 0x000f0003, /* LEN0, R/W0, G0, L0 */
+ 0x00f0000c, /* LEN1, R/W1, G1, L1 */
+ 0x0f000030, /* LEN2, R/W2, G2, L2 */
+ 0xf00000c0 /* LEN3, R/W3, G3, L3 */
+};
+
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+static unsigned long encode_dr7(int drnum, unsigned len, unsigned type)
+{
+ unsigned long temp;
+
+ temp = (len | type) & 0xf;
+ temp <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+ temp |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
+ DR_GLOBAL_SLOWDOWN;
+ return temp;
+}
+
+void arch_update_kernel_hw_breakpoints(void *unused)
+{
+ struct hw_breakpoint *bp;
+ int i, cpu = get_cpu();
+
+ /* Don't allow debug exceptions while we update the registers */
+ set_debugreg(0UL, 7);
+
+ /* Clear all kernel-space breakpoints in kdr7 */
+ kdr7 = 0;
+ for (i = hbp_kernel_pos; i < HB_NUM; i++) {
+ per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
+ if (bp) {
+ kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
+ set_debugreg(hbp_kernel[i]->info.address, i);
+ }
+ }
+
+ /* No need to set DR6. Update the debug registers with kernel-space
+ * breakpoint values from kdr7 and user-space requests from the
+ * current process
+ */
+ set_debugreg(kdr7 | current->thread.debugreg7, 7);
+ put_cpu_no_resched();
+}
+
+/*
+ * Install the thread breakpoints in their debug registers.
+ */
+void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ struct thread_struct *thread = &(tsk->thread);
+
+ switch (hbp_kernel_pos) {
+ case 4:
+ set_debugreg(thread->debugreg[3], 3);
+ case 3:
+ set_debugreg(thread->debugreg[2], 2);
+ case 2:
+ set_debugreg(thread->debugreg[1], 1);
+ case 1:
+ set_debugreg(thread->debugreg[0], 0);
+ default:
+ break;
+ }
+
+ /* No need to set DR6 */
+ set_debugreg((kdr7 | thread->debugreg7), 7);
+}
+
+/*
+ * Install the debug register values for just the kernel, no thread.
+ */
+void arch_uninstall_thread_hw_breakpoint()
+{
+ /* Clear the user-space portion of debugreg7 by setting only kdr7 */
+ set_debugreg(kdr7, 7);
+
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+ unsigned int len_in_bytes = 0;
+
+ switch (hbp_len) {
+ case HW_BREAKPOINT_LEN_1:
+ len_in_bytes = 1;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ len_in_bytes = 2;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ len_in_bytes = 4;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ len_in_bytes = 8;
+ break;
+#endif
+ }
+ return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+static int arch_store_info(struct hw_breakpoint *bp)
+{
+ /*
+ * User-space requests will always have the address field populated
+ * For kernel-addresses, either the address or symbol name can be
+ * specified.
+ */
+ if (bp->info.name)
+ bp->info.address = (unsigned long)
+ kallsyms_lookup_name(bp->info.name);
+ if (bp->info.address)
+ return 0;
+ return -EINVAL;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
+ struct task_struct *tsk)
+{
+ unsigned int align;
+ int ret = -EINVAL;
+
+ switch (bp->info.type) {
+ /*
+ * Ptrace-refactoring code
+ * For now, we'll allow instruction breakpoint only for user-space
+ * addresses
+ */
+ case HW_BREAKPOINT_EXECUTE:
+ if ((!arch_check_va_in_userspace(bp->info.address,
+ bp->info.len)) &&
+ bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
+ return ret;
+ break;
+ case HW_BREAKPOINT_WRITE:
+ break;
+ case HW_BREAKPOINT_RW:
+ break;
+ default:
+ return ret;
+ }
+
+ switch (bp->info.len) {
+ case HW_BREAKPOINT_LEN_1:
+ align = 0;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ align = 1;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ align = 3;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ align = 7;
+ break;
+#endif
+ default:
+ return ret;
+ }
+
+ if (bp->triggered)
+ ret = arch_store_info(bp);
+
+ if (ret < 0)
+ return ret;
+ /*
+ * Check that the low-order bits of the address are appropriate
+ * for the alignment implied by len.
+ */
+ if (bp->info.address & align)
+ return -EINVAL;
+
+ /* Check that the virtual address is in the proper range */
+ if (tsk) {
+ if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
+ return -EFAULT;
+ } else {
+ if (!arch_check_va_in_kernelspace(bp->info.address,
+ bp->info.len))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ struct hw_breakpoint *bp = thread->hbp[pos];
+
+ thread->debugreg7 &= ~dr7_masks[pos];
+ if (bp) {
+ thread->debugreg[pos] = bp->info.address;
+ thread->debugreg7 |= encode_dr7(pos, bp->info.len,
+ bp->info.type);
+ } else
+ thread->debugreg[pos] = 0;
+}
+
+void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *thread = &(tsk->thread);
+
+ thread->debugreg7 = 0;
+ for (i = 0; i < HB_NUM; i++)
+ thread->debugreg[i] = 0;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int i, cpu, rc = NOTIFY_STOP;
+ struct hw_breakpoint *bp;
+ /* The DR6 value is stored in args->err */
+ unsigned long dr7, dr6 = args->err;
+
+ /* Do an early return if no trap bits are set in DR6 */
+ if ((dr6 & DR_TRAP_BITS) == 0)
+ return NOTIFY_DONE;
+
+ /* Lazy debug register switching */
+ if (!test_tsk_thread_flag(current, TIF_DEBUG))
+ switch_to_none_hw_breakpoint();
+
+ get_debugreg(dr7, 7);
+ /* Disable breakpoints during exception handling */
+ set_debugreg(0UL, 7);
+ /*
+ * Assert that local interrupts are disabled
+ * Reset the DRn bits in the virtualized register value.
+ * The ptrace trigger routine will add in whatever is needed.
+ */
+ current->thread.debugreg6 &= ~DR_TRAP_BITS;
+ cpu = get_cpu();
+
+ /* Handle all the breakpoints that were triggered */
+ for (i = 0; i < HB_NUM; ++i) {
+ if (likely(!(dr6 & (DR_TRAP0 << i))))
+ continue;
+ /*
+ * Find the corresponding hw_breakpoint structure and
+ * invoke its triggered callback.
+ */
+ if (i >= hbp_kernel_pos)
+ bp = per_cpu(this_hbp_kernel[i], cpu);
+ else {
+ bp = current->thread.hbp[i];
+ /* False alarm due to lazy DR switching */
+ if (!bp)
+ continue;
+ rc = NOTIFY_DONE;
+ }
+ (bp->triggered)(bp, args->regs);
+ }
+ if (dr6 & (~DR_TRAP_BITS))
+ rc = NOTIFY_DONE;
+
+ set_debugreg(dr7, 7);
+ put_cpu_no_resched();
+ return rc;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ if (val != DIE_DEBUG)
+ return NOTIFY_DONE;
+
+ return hw_breakpoint_handler(data);
+}
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 04/12] Modifying generic debug exception to use thread-specific debug registers
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (2 preceding siblings ...)
2009-05-11 11:53 ` [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
@ 2009-05-11 11:53 ` K.Prasad
2009-05-11 11:53 ` [Patch 05/12] Use wrapper routines around debug registers in processor related functions K.Prasad
` (7 subsequent siblings)
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:53 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: modify_debug_exception_handler_04 --]
[-- Type: text/plain, Size: 3660 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch modifies the breakpoint exception handler code to use the abstract
register names.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/traps.c | 76 +++++++++++++++++-------------------------------
1 file changed, 27 insertions(+), 49 deletions(-)
Index: arch/x86/kernel/traps.c
===================================================================
--- arch/x86/kernel/traps.c.orig
+++ arch/x86/kernel/traps.c
@@ -530,76 +530,54 @@ asmlinkage __kprobes struct pt_regs *syn
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned long condition;
+ unsigned long dr6;
int si_code;
- get_debugreg(condition, 6);
+ get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
- if (condition & DR_STEP && kmemcheck_trap(regs))
+ if (((dr6 & DR_STEP) && kmemcheck_trap(regs)) &&
+ !(dr6 & DR_TRAP_BITS))
return;
-
+ /* DR6 may or may not be cleared by the CPU */
+ set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+ /* Store the virtualized DR6 value */
+ tsk->thread.debugreg6 = dr6;
+
+ if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code,
SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
- }
-
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto debug_vm86;
-#endif
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg6 = condition;
-
- /*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
- */
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, 1);
+ return;
}
- si_code = get_si_code(condition);
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code, si_code);
-
/*
- * Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
+ * Single-stepping through system calls: ignore any exceptions in
+ * kernel space, but re-enable TF when returning to user mode.
+ *
+ * We already checked v86 mode above, so we can check for kernel mode
+ * by just checking the CPL of CS.
*/
-clear_dr7:
- set_debugreg(0, 7);
- preempt_conditional_cli(regs);
- return;
-
-#ifdef CONFIG_X86_32
-debug_vm86:
- /* reenable preemption: handle_vm86_trap() might sleep */
- dec_preempt_count();
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- conditional_cli(regs);
- return;
-#endif
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ tsk->thread.debugreg6 &= ~DR_STEP;
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ regs->flags &= ~X86_EFLAGS_TF;
+ }
+ si_code = get_si_code(tsk->thread.debugreg6);
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
return;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 05/12] Use wrapper routines around debug registers in processor related functions
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (3 preceding siblings ...)
2009-05-11 11:53 ` [Patch 04/12] Modifying generic debug exception to use thread-specific debug registers K.Prasad
@ 2009-05-11 11:53 ` K.Prasad
2009-05-11 11:53 ` [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
` (6 subsequent siblings)
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:53 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: modify_processor_related_05 --]
[-- Type: text/plain, Size: 3525 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of wrapper routines to access the debug/breakpoint
registers.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/smpboot.c | 3 +++
arch/x86/power/cpu_32.c | 16 +++-------------
arch/x86/power/cpu_64.c | 15 +++------------
3 files changed, 9 insertions(+), 25 deletions(-)
Index: arch/x86/kernel/smpboot.c
===================================================================
--- arch/x86/kernel/smpboot.c.orig
+++ arch/x86/kernel/smpboot.c
@@ -63,6 +63,7 @@
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
+#include <asm/debugreg.h>
#include <linux/mc146818rtc.h>
#include <asm/smpboot_hooks.h>
@@ -326,6 +327,7 @@ notrace static void __cpuinit start_seco
setup_secondary_clock();
wmb();
+ load_debug_registers();
cpu_idle();
}
@@ -1252,6 +1254,7 @@ void cpu_disable_common(void)
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs();
+ hw_breakpoint_disable();
}
int native_cpu_disable(void)
Index: arch/x86/power/cpu_32.c
===================================================================
--- arch/x86/power/cpu_32.c.orig
+++ arch/x86/power/cpu_32.c
@@ -13,6 +13,7 @@
#include <asm/mce.h>
#include <asm/xcr.h>
#include <asm/suspend.h>
+#include <asm/debugreg.h>
static struct saved_context saved_context;
@@ -48,6 +49,7 @@ static void __save_processor_state(struc
ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3();
ctxt->cr4 = read_cr4_safe();
+ hw_breakpoint_disable();
}
/* Needed by apm.c */
@@ -80,19 +82,7 @@ static void fix_processor_context(void)
load_TR_desc(); /* This does ltr */
load_LDT(¤t->active_mm->context); /* This does lldt */
- /*
- * Now maybe reload the debug registers
- */
- if (current->thread.debugreg7) {
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(current->thread.debugreg7, 7);
- }
-
+ load_debug_registers();
}
static void __restore_processor_state(struct saved_context *ctxt)
Index: arch/x86/power/cpu_64.c
===================================================================
--- arch/x86/power/cpu_64.c.orig
+++ arch/x86/power/cpu_64.c
@@ -16,6 +16,7 @@
#include <asm/mtrr.h>
#include <asm/xcr.h>
#include <asm/suspend.h>
+#include <asm/debugreg.h>
static void fix_processor_context(void);
@@ -71,6 +72,7 @@ static void __save_processor_state(struc
ctxt->cr3 = read_cr3();
ctxt->cr4 = read_cr4();
ctxt->cr8 = read_cr8();
+ hw_breakpoint_disable();
}
void save_processor_state(void)
@@ -159,16 +161,5 @@ static void fix_processor_context(void)
load_TR_desc(); /* This does ltr */
load_LDT(¤t->active_mm->context); /* This does lldt */
- /*
- * Now maybe reload the debug registers
- */
- if (current->thread.debugreg7){
- loaddebug(¤t->thread, 0);
- loaddebug(¤t->thread, 1);
- loaddebug(¤t->thread, 2);
- loaddebug(¤t->thread, 3);
- /* no 4 and 5 */
- loaddebug(¤t->thread, 6);
- loaddebug(¤t->thread, 7);
- }
+ load_debug_registers();
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (4 preceding siblings ...)
2009-05-11 11:53 ` [Patch 05/12] Use wrapper routines around debug registers in processor related functions K.Prasad
@ 2009-05-11 11:53 ` K.Prasad
2009-05-28 6:42 ` David Gibson
2009-05-11 11:53 ` [Patch 07/12] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
` (5 subsequent siblings)
11 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:53 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 8023 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of abstract debug registers in
process-handling routines.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/include/asm/processor.h | 4 ----
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
4 files changed, 70 insertions(+), 21 deletions(-)
Index: arch/x86/kernel/process.c
===================================================================
--- arch/x86/kernel/process.c.orig
+++ arch/x86/kernel/process.c
@@ -16,6 +16,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -47,6 +49,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -105,14 +109,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -194,16 +193,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: arch/x86/kernel/process_32.c
===================================================================
--- arch/x86/kernel/process_32.c.orig
+++ arch/x86/kernel/process_32.c
@@ -61,6 +61,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -265,7 +267,14 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -285,10 +294,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -427,6 +439,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: arch/x86/kernel/process_64.c
===================================================================
--- arch/x86/kernel/process_64.c.orig
+++ arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -248,6 +250,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -347,6 +357,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -492,6 +505,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
+
return prev_p;
}
Index: arch/x86/include/asm/processor.h
===================================================================
--- arch/x86/include/asm/processor.h.orig
+++ arch/x86/include/asm/processor.h
@@ -432,10 +432,6 @@ struct thread_struct {
unsigned long fs;
unsigned long gs;
/* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
unsigned long debugreg[HB_NUM];
unsigned long debugreg6;
unsigned long debugreg7;
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 07/12] Modify signal handling code to refrain from re-enabling HW Breakpoints
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (5 preceding siblings ...)
2009-05-11 11:53 ` [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
@ 2009-05-11 11:53 ` K.Prasad
2009-05-11 11:54 ` [Patch 08/12] Modify Ptrace routines to access breakpoint registers K.Prasad
` (4 subsequent siblings)
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:53 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: modify_signal_07 --]
[-- Type: text/plain, Size: 1084 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch disables re-enabling of Hardware Breakpoint registers through
the signal handling code. This is now done during
hw_breakpoint_handler().
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/signal.c | 9 ---------
1 file changed, 9 deletions(-)
Index: arch/x86/kernel/signal.c
===================================================================
--- arch/x86/kernel/signal.c.orig
+++ arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *re
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /*
- * Re-enable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 08/12] Modify Ptrace routines to access breakpoint registers
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (6 preceding siblings ...)
2009-05-11 11:53 ` [Patch 07/12] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
@ 2009-05-11 11:54 ` K.Prasad
2009-05-11 11:54 ` [Patch 09/12] Cleanup HW Breakpoint registers before kexec K.Prasad
` (3 subsequent siblings)
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:54 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: modify_ptrace_08 --]
[-- Type: text/plain, Size: 7678 bytes --]
This patch modifies the ptrace code to use the new wrapper routines around the
debug/breakpoint registers.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Maneesh Soni <maneesh@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/ptrace.c | 226 ++++++++++++++++++++++++++++-------------------
1 file changed, 136 insertions(+), 90 deletions(-)
Index: arch/x86/kernel/ptrace.c
===================================================================
--- arch/x86/kernel/ptrace.c.orig
+++ arch/x86/kernel/ptrace.c
@@ -34,6 +34,7 @@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
#include <trace/syscall.h>
@@ -136,11 +137,6 @@ static int set_segment_reg(struct task_s
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
- return TASK_SIZE - 3;
-}
-
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -265,15 +261,6 @@ static int set_segment_reg(struct task_s
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- return IA32_PAGE_OFFSET - 3;
-#endif
- return TASK_SIZE_MAX - 7;
-}
-
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
@@ -464,95 +451,154 @@ static int genregs_set(struct task_struc
}
/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7. Return the "enabled" status.
*/
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
+ unsigned *type)
{
- switch (n) {
- case 0: return child->thread.debugreg0;
- case 1: return child->thread.debugreg1;
- case 2: return child->thread.debugreg2;
- case 3: return child->thread.debugreg3;
- case 6: return child->thread.debugreg6;
- case 7: return child->thread.debugreg7;
- }
- return 0;
+ int temp = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+ *len = (temp & 0xc) | 0x40;
+ *type = (temp & 0x3) | 0x80;
+ return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
}
-static int ptrace_set_debugreg(struct task_struct *child,
- int n, unsigned long data)
+static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
{
+ struct thread_struct *thread = &(current->thread);
int i;
- if (unlikely(n == 4 || n == 5))
- return -EIO;
+ /* Store in the virtual DR6 register the fact that the breakpoint
+ * was hit so the thread's debugger will see it.
+ */
+ for (i = 0; i < hbp_kernel_pos; i++)
+ /*
+ * We will check bp->info.address against the address stored in
+ * thread's hbp structure and not debugreg[i]. This is to ensure
+ * that the corresponding bit for 'i' in DR7 register is enabled
+ */
+ if (bp->info.address == thread->hbp[i]->info.address)
+ break;
- if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
- return -EIO;
+ thread->debugreg6 |= (DR_TRAP0 << i);
+}
- switch (n) {
- case 0: child->thread.debugreg0 = data; break;
- case 1: child->thread.debugreg1 = data; break;
- case 2: child->thread.debugreg2 = data; break;
- case 3: child->thread.debugreg3 = data; break;
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long old_dr7 = thread->debugreg7;
+ int i, rc = 0;
+ int enabled;
+ unsigned len, type;
+ struct hw_breakpoint *bp;
- case 6:
- if ((data & ~0xffffffffUL) != 0)
- return -EIO;
- child->thread.debugreg6 = data;
- break;
+ data &= ~DR_CONTROL_RESERVED;
+restore:
+ /*
+ * Loop through all the hardware breakpoints, making the
+ * appropriate changes to each.
+ */
+ for (i = 0; i < HB_NUM; i++) {
+ enabled = decode_dr7(data, i, &len, &type);
+ bp = thread->hbp[i];
+
+ if (!enabled) {
+ if (bp) {
+ unregister_user_hw_breakpoint(tsk, bp);
+ kfree(bp);
+ }
+ continue;
+ }
- case 7:
- /*
- * Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System Programming)
- */
-#ifdef CONFIG_X86_32
-#define DR7_MASK 0x5f54
-#else
-#define DR7_MASK 0x5554
-#endif
- data &= ~DR_CONTROL_RESERVED;
- for (i = 0; i < 4; i++)
- if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
+ if (!bp) {
+ rc = -ENOMEM;
+ bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (bp) {
+ bp->info.address = thread->debugreg[i];
+ bp->triggered = ptrace_triggered;
+ bp->info.len = len;
+ bp->info.type = type;
+ rc = register_user_hw_breakpoint(tsk, bp);
+ if (!rc)
+ set_tsk_thread_flag(tsk, TIF_DEBUG);
+ else
+ kfree(bp);
+ }
+ } else {
+ spin_lock_bh(&hw_breakpoint_lock);
+ rc = __modify_user_hw_breakpoint(i, tsk, bp);
+ spin_unlock_bh(&hw_breakpoint_lock);
+ }
+
+ if (rc)
+ break;
}
- return 0;
+ /* If anything above failed, restore the original settings */
+ if (rc < 0) {
+ data = old_dr7;
+ goto restore;
+ }
+ return rc;
+}
+
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long val = 0;
+
+ spin_lock_bh(&hw_breakpoint_lock);
+ if (n < HB_NUM)
+ val = thread->debugreg[n];
+ else if (n == 6)
+ val = thread->debugreg6;
+ else if (n == 7)
+ val = thread->debugreg7;
+
+ spin_unlock_bh(&hw_breakpoint_lock);
+ return val;
+}
+
+/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc = 0;
+
+ /* There are no DR4 or DR5 registers */
+ if (n == 4 || n == 5)
+ return -EIO;
+
+ if (n == 6) {
+ tsk->thread.debugreg6 = val;
+ goto ret_path;
+ }
+ if (n < HB_NUM) {
+ if (thread->hbp[n]) {
+ if (arch_check_va_in_userspace(val,
+ thread->hbp[n]->info.len) == 0) {
+ rc = -EIO;
+ goto ret_path;
+ }
+ thread->hbp[n]->info.address = val;
+ }
+ thread->debugreg[n] = val;
+ }
+ /* All that's left is DR7 */
+ if (n == 7)
+ rc = ptrace_write_dr7(tsk, val);
+
+ret_path:
+ return rc;
}
/*
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 09/12] Cleanup HW Breakpoint registers before kexec
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (7 preceding siblings ...)
2009-05-11 11:54 ` [Patch 08/12] Modify Ptrace routines to access breakpoint registers K.Prasad
@ 2009-05-11 11:54 ` K.Prasad
2009-05-11 11:54 ` [Patch 10/12] Sample HW breakpoint over kernel data address K.Prasad
` (2 subsequent siblings)
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:54 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: modify_kexec_09 --]
[-- Type: text/plain, Size: 1698 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch disables Hardware breakpoints before doing a 'kexec' on the machine.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/machine_kexec_32.c | 2 ++
arch/x86/kernel/machine_kexec_64.c | 2 ++
2 files changed, 4 insertions(+)
Index: arch/x86/kernel/machine_kexec_32.c
===================================================================
--- arch/x86/kernel/machine_kexec_32.c.orig
+++ arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/cacheflush.h>
+#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
Index: arch/x86/kernel/machine_kexec_64.c
===================================================================
--- arch/x86/kernel/machine_kexec_64.c.orig
+++ arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 10/12] Sample HW breakpoint over kernel data address
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (8 preceding siblings ...)
2009-05-11 11:54 ` [Patch 09/12] Cleanup HW Breakpoint registers before kexec K.Prasad
@ 2009-05-11 11:54 ` K.Prasad
2009-05-11 11:55 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4 K.Prasad
2009-05-11 11:55 ` [Patch 12/12] Reset bits in dr6 after the corresponding exception is handled K.Prasad
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:54 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: samples_hw_breakpoint_10 --]
[-- Type: text/plain, Size: 4609 bytes --]
This patch introduces a sample kernel module to demonstrate the use of Hardware
Breakpoint feature. It places a breakpoint over the kernel variable 'pid_max'
to monitor all write operations and emits a function-backtrace when done.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
samples/Kconfig | 6 ++
samples/Makefile | 3 -
samples/hw_breakpoint/Makefile | 1
samples/hw_breakpoint/data_breakpoint.c | 83 ++++++++++++++++++++++++++++++++
4 files changed, 92 insertions(+), 1 deletion(-)
Index: samples/Kconfig
===================================================================
--- samples/Kconfig.orig
+++ samples/Kconfig
@@ -46,5 +46,11 @@ config SAMPLE_KRETPROBES
default m
depends on SAMPLE_KPROBES && KRETPROBES
+config SAMPLE_HW_BREAKPOINT
+ tristate "Build kernel hardware breakpoint examples -- loadable modules only"
+ depends on HAVE_HW_BREAKPOINT && m
+ help
+ This builds kernel hardware breakpoint example modules.
+
endif # SAMPLES
Index: samples/Makefile
===================================================================
--- samples/Makefile.orig
+++ samples/Makefile
@@ -1,3 +1,4 @@
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \
+ trace_events/ hw_breakpoint/
Index: samples/hw_breakpoint/Makefile
===================================================================
--- /dev/null
+++ samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
Index: samples/hw_breakpoint/data_breakpoint.c
===================================================================
--- /dev/null
+++ samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,83 @@
+/*
+ * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * usage: insmod data_breakpoint.ko ksym=<ksym_name>
+ *
+ * This file is a kernel module that places a breakpoint over ksym_name kernel
+ * variable using Hardware Breakpoint register. The corresponding handler which
+ * prints a backtrace is invoked everytime a write operation is performed on
+ * that variable.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+#include <linux/module.h> /* Needed by all modules */
+#include <linux/kernel.h> /* Needed for KERN_INFO */
+#include <linux/init.h> /* Needed for the macros */
+
+#include <asm/hw_breakpoint.h>
+
+struct hw_breakpoint sample_hbp;
+
+static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
+MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
+ " write operations on the kernel symbol");
+
+void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs
+ *temp_regs)
+{
+ printk(KERN_INFO "%s value is changed\n", ksym_name);
+ dump_stack();
+ printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
+}
+
+static int __init hw_break_module_init(void)
+{
+ int ret;
+
+#ifdef CONFIG_X86
+ sample_hbp.info.name = ksym_name;
+ sample_hbp.info.type = HW_BREAKPOINT_WRITE;
+ sample_hbp.info.len = HW_BREAKPOINT_LEN_4;
+#endif /* CONFIG_X86 */
+
+ sample_hbp.triggered = (void *)sample_hbp_handler;
+
+ ret = register_kernel_hw_breakpoint(&sample_hbp);
+
+ if (ret < 0) {
+ printk(KERN_INFO "Breakpoint registration failed\n");
+ return ret;
+ } else
+ printk(KERN_INFO "HW Breakpoint for %s write installed\n",
+ ksym_name);
+
+ return 0;
+}
+
+static void __exit hw_break_module_exit(void)
+{
+ unregister_kernel_hw_breakpoint(&sample_hbp);
+ printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
+}
+
+module_init(hw_break_module_init);
+module_exit(hw_break_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("K.Prasad");
+MODULE_DESCRIPTION("ksym breakpoint");
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (9 preceding siblings ...)
2009-05-11 11:54 ` [Patch 10/12] Sample HW breakpoint over kernel data address K.Prasad
@ 2009-05-11 11:55 ` K.Prasad
2009-05-11 22:14 ` Frederic Weisbecker
2009-05-11 11:55 ` [Patch 12/12] Reset bits in dr6 after the corresponding exception is handled K.Prasad
11 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:55 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: ftrace_ksym_tracer_11 --]
[-- Type: text/plain, Size: 19200 bytes --]
This patch adds an ftrace plugin to detect and profile memory access over kernel
variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
kernel/trace/Kconfig | 21 +
kernel/trace/Makefile | 1
kernel/trace/trace.h | 20 +
kernel/trace/trace_ksym.c | 557 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_selftest.c | 37 ++
5 files changed, 636 insertions(+)
Index: kernel/trace/Kconfig
===================================================================
--- kernel/trace/Kconfig.orig
+++ kernel/trace/Kconfig
@@ -308,6 +308,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
Index: kernel/trace/Makefile
===================================================================
--- kernel/trace/Makefile.orig
+++ kernel/trace/Makefile
@@ -50,5 +50,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_exp
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
libftrace-y := ftrace.o
Index: kernel/trace/trace.h
===================================================================
--- kernel/trace/trace.h.orig
+++ kernel/trace/trace.h
@@ -15,6 +15,10 @@
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
+#ifdef CONFIG_KSYM_TRACER
+#include <asm/hw_breakpoint.h>
+#endif
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -40,6 +44,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -207,6 +212,18 @@ struct syscall_trace_exit {
unsigned long ret;
};
+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbp;
+ unsigned long ksym_addr;
+ unsigned long ip;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ unsigned long counter;
+#endif
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
/*
* trace_flag_type is an enumeration that holds different
@@ -323,6 +340,7 @@ extern void __ftrace_bad_type(void);
TRACE_SYSCALL_ENTER); \
IF_ASSIGN(var, ent, struct syscall_trace_exit, \
TRACE_SYSCALL_EXIT); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)
@@ -538,6 +556,8 @@ extern int trace_selftest_startup_branch
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
Index: kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ kernel/trace/trace_ksym.c
@@ -0,0 +1,557 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#ifdef CONFIG_FTRACE_SELFTEST
+
+static int ksym_selftest_dummy;
+#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
+
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static struct trace_array *ksym_trace_array;
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+
+static DEFINE_MUTEX(ksym_tracer_mutex);
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
+ if ((entry->ksym_addr == hbp_hit_addr) &&
+ (entry->counter <= MAX_UL_INT)) {
+ entry->counter++;
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbp = hbp;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ ksym_collect_stats(hbp->info.address);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
+ (*addr == 0))
+ goto return_code;
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbp) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+
+ entry->ksym_hbp->info.name = ksymname;
+ entry->ksym_hbp->info.type = op;
+ entry->ksym_addr = entry->ksym_hbp->info.address = addr;
+#ifdef CONFIG_X86
+ entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
+#endif
+ entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
+
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret < 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbp->info.name);
+ if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbp->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ entry->ksym_hbp->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
+ if (ret == 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del_rcu(&(entry->ksym_hlist));
+ synchronize_rcu();
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ ksym_tracing_enabled = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbp);
+ ksym_filter_entry_count--;
+ hlist_del_rcu(&(entry->ksym_hlist));
+ synchronize_rcu();
+ /* Free the 'input_string' only if reset
+ * after startup self-test
+ */
+#ifdef CONFIG_FTRACE_SELFTEST
+ if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
+ strlen(KSYM_SELFTEST_ENTRY)) != 0)
+#endif /* CONFIG_FTRACE_SELFTEST*/
+ kfree(entry->ksym_hbp->info.name);
+ kfree(entry->ksym_hbp);
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+#ifdef CONFIG_FTRACE_SELFTEST
+ /* Check if we are re-entering self-test code during initialisation */
+ if (ksym_selftest_dummy)
+ goto ret_path;
+
+ ksym_selftest_dummy = 0;
+
+ /* Register the read-write tracing request */
+ ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+ret_path:
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ if (entry->type != TRACE_KSYM)
+ return TRACE_TYPE_UNHANDLED;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ entry->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbp->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+ seq_printf(m, " Access type ");
+ seq_printf(m, " Symbol Counter \n");
+ return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+ struct hlist_node *stat = v;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
+ entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+ if (entry->ksym_hbp)
+ access_type = entry->ksym_hbp->info.type;
+
+ switch (access_type) {
+ case HW_BREAKPOINT_WRITE:
+ seq_printf(m, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ seq_printf(m, " RW ");
+ break;
+ default:
+ seq_printf(m, " NA ");
+ }
+
+ if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+ seq_printf(m, " %s ", fn_name);
+ else
+ seq_printf(m, " <NA> ");
+
+ seq_printf(m, "%15lu\n", entry->counter);
+ return 0;
+}
+
+static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+{
+ return &(ksym_filter_head.first);
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+ struct hlist_node *stat = v;
+
+ return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+ .name = "ksym_tracer",
+ .stat_start = ksym_tracer_stat_start,
+ .stat_next = ksym_tracer_stat_next,
+ .stat_headers = ksym_tracer_stat_headers,
+ .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+ int ret;
+
+ ret = register_stat_tracer(&ksym_tracer_stats);
+ if (ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "ksym tracer stats\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
Index: kernel/trace/trace_selftest.c
===================================================================
--- kernel/trace/trace_selftest.c.orig
+++ kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(stru
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -807,3 +808,39 @@ trace_selftest_startup_hw_branches(struc
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
+
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 12/12] Reset bits in dr6 after the corresponding exception is handled
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
` (10 preceding siblings ...)
2009-05-11 11:55 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4 K.Prasad
@ 2009-05-11 11:55 ` K.Prasad
11 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 11:55 UTC (permalink / raw)
To: Alan Stern, Steven Rostedt, Frederic Weisbecker
Cc: Ingo Molnar, Linux Kernel Mailing List, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu
[-- Attachment #1: dr6_changes_12 --]
[-- Type: text/plain, Size: 4295 bytes --]
This patch resets the bit in dr6 after the corresponding exception is
handled in code.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
arch/x86/kernel/hw_breakpoint.c | 13 +++++++++++--
arch/x86/kernel/kgdb.c | 6 ++++++
arch/x86/kernel/kprobes.c | 9 ++++++++-
arch/x86/kernel/traps.c | 4 ++--
arch/x86/mm/kmmio.c | 8 +++++++-
5 files changed, 34 insertions(+), 6 deletions(-)
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/traps.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/traps.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/traps.c
@@ -550,8 +550,8 @@ dotraplinkage void __kprobes do_debug(st
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
- if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code,
- SIGTRAP) == NOTIFY_STOP)
+ if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/kgdb.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/kgdb.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
#include <linux/smp.h>
#include <linux/nmi.h>
+#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/system.h>
@@ -433,6 +434,11 @@ single_step_cont(struct pt_regs *regs, s
"resuming...\n");
kgdb_arch_handle_exception(args->trapnr, args->signr,
args->err, "c", "", regs);
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/kprobes.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/kprobes.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/kprobes.c
@@ -54,6 +54,7 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/alternative.h>
+#include <asm/debugreg.h>
void jprobe_return_end(void);
@@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(s
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
+ if (post_kprobe_handler(args->regs)) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
+ }
break;
case DIE_GPF:
/*
Index: linux-2.6-tip.hbkpt/arch/x86/mm/kmmio.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/mm/kmmio.c
+++ linux-2.6-tip.hbkpt/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block
struct die_args *arg = args;
if (val == DIE_DEBUG && (arg->err & DR_STEP))
- if (post_kmmio_handler(arg->err, arg->regs) == 1)
+ if (post_kmmio_handler(arg->err, arg->regs) == 1) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
return NOTIFY_STOP;
+ }
return NOTIFY_DONE;
}
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/hw_breakpoint.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/hw_breakpoint.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/hw_breakpoint.c
@@ -296,8 +296,12 @@ int __kprobes hw_breakpoint_handler(stru
{
int i, cpu, rc = NOTIFY_STOP;
struct hw_breakpoint *bp;
- /* The DR6 value is stored in args->err */
- unsigned long dr7, dr6 = args->err;
+ unsigned long dr7, dr6;
+ unsigned long *dr6_p;
+
+ /* The DR6 value is pointed by args->err */
+ dr6_p = (unsigned long *)ERR_PTR(args->err);
+ dr6 = *dr6_p;
/* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0)
@@ -336,6 +340,11 @@ int __kprobes hw_breakpoint_handler(stru
rc = NOTIFY_DONE;
}
(bp->triggered)(bp, args->regs);
+ /*
+ * Reset the 'i'th TRAP bit in dr6 to denote completion of
+ * exception handling
+ */
+ (*dr6_p) &= ~(DR_TRAP0 << i);
}
if (dr6 & (~DR_TRAP_BITS))
rc = NOTIFY_DONE;
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 02/12] Introducing generic hardware breakpoint handler interfaces
2009-05-11 11:52 ` [Patch 02/12] Introducing generic hardware breakpoint handler interfaces K.Prasad
@ 2009-05-11 12:12 ` Bharata B Rao
2009-05-11 12:16 ` K.Prasad
2009-05-28 6:15 ` David Gibson
1 sibling, 1 reply; 40+ messages in thread
From: Bharata B Rao @ 2009-05-11 12:12 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 5:22 PM, K.Prasad <prasad@linux.vnet.ibm.com> wrote:
> This patch introduces the generic Hardware Breakpoint interfaces for both user
> and kernel space requests.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
> ---
> arch/Kconfig | 4
> kernel/Makefile | 1
> kernel/hw_breakpoint.c | 367 +++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 372 insertions(+)
>
> Index: arch/Kconfig
> ===================================================================
> --- arch/Kconfig.orig
> +++ arch/Kconfig
> @@ -112,3 +112,7 @@ config HAVE_DMA_API_DEBUG
>
> config HAVE_DEFAULT_NO_SPIN_MUTEXES
> bool
> +
> +config HAVE_HW_BREAKPOINT
> + bool
> +
> Index: kernel/Makefile
> ===================================================================
> --- kernel/Makefile.orig
> +++ kernel/Makefile
> @@ -97,6 +97,7 @@ obj-$(CONFIG_X86_DS) += trace/
> obj-$(CONFIG_SMP) += sched_cpupri.o
> obj-$(CONFIG_SLOW_WORK) += slow-work.o
> obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
> +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
>
> ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
> # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
> Index: kernel/hw_breakpoint.c
> ===================================================================
> --- /dev/null
> +++ kernel/hw_breakpoint.c
> @@ -0,0 +1,367 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) 2007 Alan Stern
> + * Copyright (C) IBM Corporation, 2009
> + */
> +
> +/*
> + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
> + * using the CPU's debug registers.
> + *
> + * This file contains the arch-independent routines. It is not meant
> + * to be compiled as a standalone source file; rather it should be
> + * #include'd by the arch-specific implementation.
That's no longer true, right ?
Regards,
Bharata.
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 02/12] Introducing generic hardware breakpoint handler interfaces
2009-05-11 12:12 ` Bharata B Rao
@ 2009-05-11 12:16 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-11 12:16 UTC (permalink / raw)
To: Bharata B Rao
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 05:42:22PM +0530, Bharata B Rao wrote:
> On Mon, May 11, 2009 at 5:22 PM, K.Prasad <prasad@linux.vnet.ibm.com> wrote:
> > Index: kernel/hw_breakpoint.c
> > ===================================================================
> > --- /dev/null
> > +++ kernel/hw_breakpoint.c
> > @@ -0,0 +1,367 @@
> > +/*
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> > + *
> > + * Copyright (C) 2007 Alan Stern
> > + * Copyright (C) IBM Corporation, 2009
> > + */
> > +
> > +/*
> > + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
> > + * using the CPU's debug registers.
> > + *
> > + * This file contains the arch-independent routines. It is not meant
> > + * to be compiled as a standalone source file; rather it should be
> > + * #include'd by the arch-specific implementation.
>
> That's no longer true, right ?
>
> Regards,
> Bharata.
Correct. The comments are stale and should have been cleaned-up. I will
do the same during the next submission of the patchset.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4
2009-05-11 11:55 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4 K.Prasad
@ 2009-05-11 22:14 ` Frederic Weisbecker
2009-05-12 14:19 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint " K.Prasad
0 siblings, 1 reply; 40+ messages in thread
From: Frederic Weisbecker @ 2009-05-11 22:14 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> This patch adds an ftrace plugin to detect and profile memory access over kernel
> variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> kernel/trace/Kconfig | 21 +
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 20 +
> kernel/trace/trace_ksym.c | 557 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_selftest.c | 37 ++
> 5 files changed, 636 insertions(+)
>
> Index: kernel/trace/Kconfig
> ===================================================================
> --- kernel/trace/Kconfig.orig
> +++ kernel/trace/Kconfig
> @@ -308,6 +308,27 @@ config POWER_TRACER
> power management decisions, specifically the C-state and P-state
> behavior.
>
> +config KSYM_TRACER
> + bool "Trace read and write access on kernel memory locations"
> + depends on HAVE_HW_BREAKPOINT
> + select TRACING
> + help
> + This tracer helps find read and write operations on any given kernel
> + symbol i.e. /proc/kallsyms.
> +
> +config PROFILE_KSYM_TRACER
> + bool "Profile all kernel memory accesses on 'watched' variables"
> + depends on KSYM_TRACER
> + help
> + This tracer profiles kernel accesses on variables watched through the
> + ksym tracer ftrace plugin. Depending upon the hardware, all read
> + and write operations on kernel variables can be monitored for
> + accesses.
> +
> + The results will be displayed in:
> + /debugfs/tracing/profile_ksym
> +
> + Say N if unsure.
>
> config STACK_TRACER
> bool "Trace max stack"
> Index: kernel/trace/Makefile
> ===================================================================
> --- kernel/trace/Makefile.orig
> +++ kernel/trace/Makefile
> @@ -50,5 +50,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_exp
> obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
> obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
> obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
> +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
>
> libftrace-y := ftrace.o
> Index: kernel/trace/trace.h
> ===================================================================
> --- kernel/trace/trace.h.orig
> +++ kernel/trace/trace.h
> @@ -15,6 +15,10 @@
> #include <linux/trace_seq.h>
> #include <linux/ftrace_event.h>
>
> +#ifdef CONFIG_KSYM_TRACER
> +#include <asm/hw_breakpoint.h>
> +#endif
> +
> enum trace_type {
> __TRACE_FIRST_TYPE = 0,
>
> @@ -40,6 +44,7 @@ enum trace_type {
> TRACE_KMEM_FREE,
> TRACE_POWER,
> TRACE_BLK,
> + TRACE_KSYM,
>
> __TRACE_LAST_TYPE,
> };
> @@ -207,6 +212,18 @@ struct syscall_trace_exit {
> unsigned long ret;
> };
>
> +struct trace_ksym {
> + struct trace_entry ent;
> + struct hw_breakpoint *ksym_hbp;
> + unsigned long ksym_addr;
> + unsigned long ip;
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> + unsigned long counter;
> +#endif
> + struct hlist_node ksym_hlist;
> + char ksym_name[KSYM_NAME_LEN];
> + char p_name[TASK_COMM_LEN];
> +};
>
> /*
> * trace_flag_type is an enumeration that holds different
> @@ -323,6 +340,7 @@ extern void __ftrace_bad_type(void);
> TRACE_SYSCALL_ENTER); \
> IF_ASSIGN(var, ent, struct syscall_trace_exit, \
> TRACE_SYSCALL_EXIT); \
> + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
> __ftrace_bad_type(); \
> } while (0)
>
> @@ -538,6 +556,8 @@ extern int trace_selftest_startup_branch
> struct trace_array *tr);
> extern int trace_selftest_startup_hw_branches(struct tracer *trace,
> struct trace_array *tr);
> +extern int trace_selftest_startup_ksym(struct tracer *trace,
> + struct trace_array *tr);
> #endif /* CONFIG_FTRACE_STARTUP_TEST */
>
> extern void *head_page(struct trace_array_cpu *data);
> Index: kernel/trace/trace_ksym.c
> ===================================================================
> --- /dev/null
> +++ kernel/trace/trace_ksym.c
> @@ -0,0 +1,557 @@
> +/*
> + * trace_ksym.c - Kernel Symbol Tracer
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) IBM Corporation, 2009
> + */
> +
> +#include <linux/kallsyms.h>
> +#include <linux/uaccess.h>
> +#include <linux/debugfs.h>
> +#include <linux/ftrace.h>
> +#include <linux/module.h>
> +#include <linux/jhash.h>
> +#include <linux/fs.h>
> +
> +#include "trace_output.h"
> +#include "trace_stat.h"
> +#include "trace.h"
> +
> +/* For now, let us restrict the no. of symbols traced simultaneously to number
> + * of available hardware breakpoint registers.
> + */
> +#define KSYM_TRACER_MAX HB_NUM
> +
> +#define KSYM_TRACER_OP_LEN 3 /* rw- */
> +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> +
> +static int ksym_selftest_dummy;
> +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
> +
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> +static struct trace_array *ksym_trace_array;
> +
> +static unsigned int ksym_filter_entry_count;
> +static unsigned int ksym_tracing_enabled;
> +
> +static HLIST_HEAD(ksym_filter_head);
> +
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +
> +#define MAX_UL_INT 0xffffffff
> +
> +static DEFINE_MUTEX(ksym_tracer_mutex);
> +
> +void ksym_collect_stats(unsigned long hbp_hit_addr)
> +{
> + struct hlist_node *node;
> + struct trace_ksym *entry;
> +
> + rcu_read_lock();
> + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
> + if ((entry->ksym_addr == hbp_hit_addr) &&
> + (entry->counter <= MAX_UL_INT)) {
> + entry->counter++;
> + break;
> + }
> + }
> + rcu_read_unlock();
rcu looks a good idea to maintain your list.
> +}
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> +
> +void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
> +{
> + struct ring_buffer_event *event;
> + struct trace_array *tr;
> + struct trace_ksym *entry;
> + int pc;
> +
> + if (!ksym_tracing_enabled)
> + return;
> +
> + tr = ksym_trace_array;
> + pc = preempt_count();
> +
> + event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
> + sizeof(*entry), 0, pc);
> + if (!event)
> + return;
> +
> + entry = ring_buffer_event_data(event);
> + strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN);
> + entry->ksym_hbp = hbp;
> + entry->ip = instruction_pointer(regs);
> + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> + ksym_collect_stats(hbp->info.address);
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> +
> + trace_buffer_unlock_commit(tr, event, 0, pc);
> +}
> +
> +/* Valid access types are represented as
> + *
> + * rw- : Set Read/Write Access Breakpoint
> + * -w- : Set Write Access Breakpoint
> + * --- : Clear Breakpoints
> + * --x : Set Execution Break points (Not available yet)
> + *
> + */
> +static int ksym_trace_get_access_type(char *access_str)
> +{
> + int pos, access = 0;
> +
> + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
> + switch (access_str[pos]) {
> + case 'r':
> + access += (pos == 0) ? 4 : -1;
> + break;
> + case 'w':
> + access += (pos == 1) ? 2 : -1;
> + break;
> + case '-':
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + switch (access) {
> + case 6:
> + access = HW_BREAKPOINT_RW;
> + break;
> + case 2:
> + access = HW_BREAKPOINT_WRITE;
> + break;
> + case 0:
> + access = 0;
> + }
> +
> + return access;
> +}
> +
> +/*
> + * There can be several possible malformed requests and we attempt to capture
> + * all of them. We enumerate some of the rules
> + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
> + * i.e. multiple ':' symbols disallowed. Possible uses are of the form
> + * <module>:<ksym_name>:<op>.
> + * 2. No delimiter symbol ':' in the input string
> + * 3. Spurious operator symbols or symbols not in their respective positions
> + * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
> + * 5. Kernel symbol not a part of /proc/kallsyms
> + * 6. Duplicate requests
> + */
> +static int parse_ksym_trace_str(char *input_string, char **ksymname,
> + unsigned long *addr)
> +{
> + char *delimiter = ":";
> + int ret;
> +
> + ret = -EINVAL;
> + *ksymname = strsep(&input_string, delimiter);
> + *addr = kallsyms_lookup_name(*ksymname);
> +
> + /* Check for malformed request: (2), (1) and (5) */
> + if ((!input_string) ||
> + (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) ||
> + (*addr == 0))
> + goto return_code;
> + ret = ksym_trace_get_access_type(input_string);
> +
> +return_code:
> + return ret;
> +}
> +
> +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> +{
> + struct trace_ksym *entry;
> + int ret;
> +
> + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> + " new requests for tracing can be accepted now.\n",
> + KSYM_TRACER_MAX);
> + return -ENOSPC;
> + }
> +
> + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> + if (!entry)
> + return -ENOMEM;
> +
> + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> + if (!entry->ksym_hbp) {
> + kfree(entry);
> + return -ENOMEM;
> + }
> +
> + entry->ksym_hbp->info.name = ksymname;
> + entry->ksym_hbp->info.type = op;
> + entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> +#ifdef CONFIG_X86
> + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> +#endif
> + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> +
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> + if (ret < 0) {
> + printk(KERN_INFO "ksym_tracer request failed. Try again"
> + " later!!\n");
> + kfree(entry->ksym_hbp);
> + kfree(entry);
> + return -EAGAIN;
> + }
> + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
And then ksym_tracer_mutex protect concurrent writers.
> + ksym_filter_entry_count++;
> +
> + return 0;
> +}
> +
> +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> + ssize_t ret, cnt = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
And here you don't use the rcu version.
I guess it's fine since you're protected by the writer lock...
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> + entry->ksym_hbp->info.name);
> + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "-w-\n");
> + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "rw-\n");
> + }
> + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> + mutex_unlock(&ksym_tracer_mutex);
> +
> + return ret;
> +}
> +
> +static ssize_t ksym_trace_filter_write(struct file *file,
> + const char __user *buffer,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char *input_string, *ksymname = NULL;
> + unsigned long ksym_addr = 0;
> + int ret, op, changed = 0;
> +
> + /* Ignore echo "" > ksym_trace_filter */
> + if (count == 0)
> + return 0;
> +
> + input_string = kzalloc(count, GFP_KERNEL);
> + if (!input_string)
> + return -ENOMEM;
> +
> + if (copy_from_user(input_string, buffer, count)) {
> + kfree(input_string);
> + return -EFAULT;
> + }
> +
> + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> + if (ret < 0) {
> + kfree(input_string);
> + return ret;
> + }
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + ret = -EINVAL;
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
Same here, ok...
> + if (entry->ksym_addr == ksym_addr) {
> + /* Check for malformed request: (6) */
> + if (entry->ksym_hbp->info.type != op)
> + changed = 1;
> + else
> + goto err_ret;
> + break;
> + }
> + }
> + if (changed) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbp);
> + entry->ksym_hbp->info.type = op;
> + if (op > 0) {
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> + if (ret == 0) {
> + ret = count;
> + goto unlock_ret_path;
> + }
> + }
> + ksym_filter_entry_count--;
> + hlist_del_rcu(&(entry->ksym_hlist));
> + synchronize_rcu();
> + kfree(entry->ksym_hbp);
> + kfree(entry);
> + ret = count;
> + goto err_ret;
> + } else {
> + /* Check for malformed request: (4) */
> + if (op == 0)
> + goto err_ret;
> + ret = process_new_ksym_entry(ksymname, op, ksym_addr);
> + if (ret)
> + goto err_ret;
> + }
> + ret = count;
> + goto unlock_ret_path;
> +
> +err_ret:
> + kfree(input_string);
> +
> +unlock_ret_path:
> + mutex_unlock(&ksym_tracer_mutex);
> + return ret;
> +}
> +
> +static const struct file_operations ksym_tracing_fops = {
> + .open = tracing_open_generic,
> + .read = ksym_trace_filter_read,
> + .write = ksym_trace_filter_write,
> +};
> +
> +static void ksym_trace_reset(struct trace_array *tr)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node, *node1;
> +
> + ksym_tracing_enabled = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
> + ksym_hlist) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbp);
> + ksym_filter_entry_count--;
> + hlist_del_rcu(&(entry->ksym_hlist));
> + synchronize_rcu();
> + /* Free the 'input_string' only if reset
> + * after startup self-test
> + */
> +#ifdef CONFIG_FTRACE_SELFTEST
> + if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY,
> + strlen(KSYM_SELFTEST_ENTRY)) != 0)
> +#endif /* CONFIG_FTRACE_SELFTEST*/
> + kfree(entry->ksym_hbp->info.name);
> + kfree(entry->ksym_hbp);
> + kfree(entry);
> + }
> + mutex_unlock(&ksym_tracer_mutex);
> +}
> +
> +static int ksym_trace_init(struct trace_array *tr)
> +{
> + int cpu, ret = 0;
> +
> + for_each_online_cpu(cpu)
> + tracing_reset(tr, cpu);
> +
> + ksym_tracing_enabled = 1;
> + ksym_trace_array = tr;
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> + /* Check if we are re-entering self-test code during initialisation */
> + if (ksym_selftest_dummy)
> + goto ret_path;
> +
> + ksym_selftest_dummy = 0;
> +
> + /* Register the read-write tracing request */
> + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> + (unsigned long)(&ksym_selftest_dummy));
> +
> + if (ret < 0) {
> + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> + goto ret_path;
> + }
> + /* Perform a read and a write operation over the dummy variable to
> + * trigger the tracer
> + */
> + if (ksym_selftest_dummy == 0)
> + ksym_selftest_dummy++;
> +ret_path:
> +#endif /* CONFIG_FTRACE_SELFTEST */
It means that each time your tracer is selected, it will perform a selftest.
I think we only need this selftest once during the boot.
I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
> +
> + return ret;
> +}
> +
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
> +
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + if (entry->type != TRACE_KSYM)
> + return TRACE_TYPE_UNHANDLED;
> +
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + entry->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbp->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +struct tracer ksym_tracer __read_mostly =
> +{
> + .name = "ksym_tracer",
> + .init = ksym_trace_init,
> + .reset = ksym_trace_reset,
> +#ifdef CONFIG_FTRACE_SELFTEST
> + .selftest = trace_selftest_startup_ksym,
> +#endif
> + .print_header = ksym_trace_print_header,
> + .print_line = ksym_trace_output
> +};
> +
> +__init static int init_ksym_trace(void)
> +{
> + struct dentry *d_tracer;
> + struct dentry *entry;
> +
> + d_tracer = tracing_init_dentry();
> + ksym_filter_entry_count = 0;
> +
> + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
Still writeable for everyone?
Thanks,
Frederic.
> + NULL, &ksym_tracing_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'ksym_trace_filter' file\n");
> +
> + return register_tracer(&ksym_tracer);
> +}
> +device_initcall(init_ksym_trace);
> +
> +
> +#ifdef CONFIG_PROFILE_KSYM_TRACER
> +static int ksym_tracer_stat_headers(struct seq_file *m)
> +{
> + seq_printf(m, " Access type ");
> + seq_printf(m, " Symbol Counter \n");
> + return 0;
> +}
> +
> +static int ksym_tracer_stat_show(struct seq_file *m, void *v)
> +{
> + struct hlist_node *stat = v;
> + struct trace_ksym *entry;
> + int access_type = 0;
> + char fn_name[KSYM_NAME_LEN];
> +
> + entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
> +
> + if (entry->ksym_hbp)
> + access_type = entry->ksym_hbp->info.type;
> +
> + switch (access_type) {
> + case HW_BREAKPOINT_WRITE:
> + seq_printf(m, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + seq_printf(m, " RW ");
> + break;
> + default:
> + seq_printf(m, " NA ");
> + }
> +
> + if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
> + seq_printf(m, " %s ", fn_name);
> + else
> + seq_printf(m, " <NA> ");
> +
> + seq_printf(m, "%15lu\n", entry->counter);
> + return 0;
> +}
> +
> +static void *ksym_tracer_stat_start(struct tracer_stat *trace)
> +{
> + return &(ksym_filter_head.first);
> +}
> +
> +static void *
> +ksym_tracer_stat_next(void *v, int idx)
> +{
> + struct hlist_node *stat = v;
> +
> + return stat->next;
> +}
> +
> +static struct tracer_stat ksym_tracer_stats = {
> + .name = "ksym_tracer",
> + .stat_start = ksym_tracer_stat_start,
> + .stat_next = ksym_tracer_stat_next,
> + .stat_headers = ksym_tracer_stat_headers,
> + .stat_show = ksym_tracer_stat_show
> +};
> +
> +__init static int ksym_tracer_stat_init(void)
> +{
> + int ret;
> +
> + ret = register_stat_tracer(&ksym_tracer_stats);
> + if (ret) {
> + printk(KERN_WARNING "Warning: could not register "
> + "ksym tracer stats\n");
> + return 1;
> + }
> +
> + return 0;
> +}
> +fs_initcall(ksym_tracer_stat_init);
> +#endif /* CONFIG_PROFILE_KSYM_TRACER */
> Index: kernel/trace/trace_selftest.c
> ===================================================================
> --- kernel/trace/trace_selftest.c.orig
> +++ kernel/trace/trace_selftest.c
> @@ -17,6 +17,7 @@ static inline int trace_valid_entry(stru
> case TRACE_GRAPH_ENT:
> case TRACE_GRAPH_RET:
> case TRACE_HW_BRANCHES:
> + case TRACE_KSYM:
> return 1;
> }
> return 0;
> @@ -807,3 +808,39 @@ trace_selftest_startup_hw_branches(struc
> return ret;
> }
> #endif /* CONFIG_HW_BRANCH_TRACER */
> +
> +#ifdef CONFIG_KSYM_TRACER
> +int
> +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> +{
> + unsigned long count;
> + int ret;
> +
> + /* start the tracing */
> + ret = tracer_init(trace, tr);
> + if (ret) {
> + warn_failed_init_tracer(trace, ret);
> + return ret;
> + }
> +
> + /* Sleep for a 1/10 of a second */
> + msleep(100);
> + /* stop the tracing. */
> + tracing_stop();
> + /* check the trace buffer */
> + ret = trace_test_buffer(tr, &count);
> + trace->reset(tr);
> + tracing_start();
> +
> + /* read & write operations - one each is performed on the dummy variable
> + * triggering two entries in the trace buffer
> + */
> + if (!ret && count != 2) {
> + printk(KERN_CONT "Ksym tracer startup test failed");
> + ret = -1;
> + }
> +
> + return ret;
> +}
> +#endif /* CONFIG_KSYM_TRACER */
> +
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint interfaces - v4
2009-05-11 22:14 ` Frederic Weisbecker
@ 2009-05-12 14:19 ` K.Prasad
2009-05-12 15:15 ` Frederic Weisbecker
0 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-12 14:19 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Tue, May 12, 2009 at 12:14:29AM +0200, Frederic Weisbecker wrote:
> On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over kernel
> > variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
> >
> > +void ksym_collect_stats(unsigned long hbp_hit_addr)
> > +{
> > + struct hlist_node *node;
> > + struct trace_ksym *entry;
> > +
> > + rcu_read_lock();
> > + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
> > + if ((entry->ksym_addr == hbp_hit_addr) &&
> > + (entry->counter <= MAX_UL_INT)) {
> > + entry->counter++;
> > + break;
> > + }
> > + }
> > + rcu_read_unlock();
>
>
>
> rcu looks a good idea to maintain your list.
>
True, and there weren't many choices either. The earlier implementations
with mutex/spin_lock turned out to be incorrect in their own way (while
mutexes cannot be used inside exception handler context, spinlocks led to
potential circular dependancy).
> > +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> > +{
> > + struct trace_ksym *entry;
> > + int ret;
> > +
> > + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > + " new requests for tracing can be accepted now.\n",
> > + KSYM_TRACER_MAX);
> > + return -ENOSPC;
> > + }
> > +
> > + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> > + if (!entry)
> > + return -ENOMEM;
> > +
> > + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > + if (!entry->ksym_hbp) {
> > + kfree(entry);
> > + return -ENOMEM;
> > + }
> > +
> > + entry->ksym_hbp->info.name = ksymname;
> > + entry->ksym_hbp->info.type = op;
> > + entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> > +#ifdef CONFIG_X86
> > + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> > +#endif
> > + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> > +
> > + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> > + if (ret < 0) {
> > + printk(KERN_INFO "ksym_tracer request failed. Try again"
> > + " later!!\n");
> > + kfree(entry->ksym_hbp);
> > + kfree(entry);
> > + return -EAGAIN;
> > + }
> > + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
>
>
> And then ksym_tracer_mutex protect concurrent writers.
>
>
Yes, they synchronise read/write operations over the list pointed by
ksym_filter_head.
> > + ksym_filter_entry_count++;
> > +
> > + return 0;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > + size_t count, loff_t *ppos)
> > +{
> > + struct trace_ksym *entry;
> > + struct hlist_node *node;
> > + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > + ssize_t ret, cnt = 0;
> > +
> > + mutex_lock(&ksym_tracer_mutex);
> > +
> > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
>
>
>
> And here you don't use the rcu version.
> I guess it's fine since you're protected by the writer lock...
>
I couldn't use RCU here because the simple_read_from_buffer() is
inatomic, and hence the mutex.
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > + entry->ksym_hbp->info.name);
> > + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > + "-w-\n");
> > + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > + "rw-\n");
> > + }
> > + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > + mutex_unlock(&ksym_tracer_mutex);
> > +
> > + return ret;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_write(struct file *file,
> > + const char __user *buffer,
> > + size_t count, loff_t *ppos)
> > +{
> > + struct trace_ksym *entry;
> > + struct hlist_node *node;
> > + char *input_string, *ksymname = NULL;
> > + unsigned long ksym_addr = 0;
> > + int ret, op, changed = 0;
> > +
> > + /* Ignore echo "" > ksym_trace_filter */
> > + if (count == 0)
> > + return 0;
> > +
> > + input_string = kzalloc(count, GFP_KERNEL);
> > + if (!input_string)
> > + return -ENOMEM;
> > +
> > + if (copy_from_user(input_string, buffer, count)) {
> > + kfree(input_string);
> > + return -EFAULT;
> > + }
> > +
> > + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > + if (ret < 0) {
> > + kfree(input_string);
> > + return ret;
> > + }
> > +
> > + mutex_lock(&ksym_tracer_mutex);
> > +
> > + ret = -EINVAL;
> > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
>
>
>
> Same here, ok...
>
>
> > +static int ksym_trace_init(struct trace_array *tr)
> > +{
> > + int cpu, ret = 0;
> > +
> > + for_each_online_cpu(cpu)
> > + tracing_reset(tr, cpu);
> > +
> > + ksym_tracing_enabled = 1;
> > + ksym_trace_array = tr;
> > +
> > +#ifdef CONFIG_FTRACE_SELFTEST
> > + /* Check if we are re-entering self-test code during initialisation */
> > + if (ksym_selftest_dummy)
> > + goto ret_path;
> > +
> > + ksym_selftest_dummy = 0;
> > +
> > + /* Register the read-write tracing request */
> > + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> > + (unsigned long)(&ksym_selftest_dummy));
> > +
> > + if (ret < 0) {
> > + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> > + goto ret_path;
> > + }
> > + /* Perform a read and a write operation over the dummy variable to
> > + * trigger the tracer
> > + */
> > + if (ksym_selftest_dummy == 0)
> > + ksym_selftest_dummy++;
> > +ret_path:
> > +#endif /* CONFIG_FTRACE_SELFTEST */
>
>
> It means that each time your tracer is selected, it will perform a selftest.
> I think we only need this selftest once during the boot.
> I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
>
> > + if (ksym_selftest_dummy)
> > + goto ret_path;
The above check will help prevent a re-run of the test everytime init is
executed.
A part of the selftest was kept in trace_ksym.c (and hence in
ksym_trace_init()) in order to use functions local to this file, such as
process_new_ksym_entry().
> > +__init static int init_ksym_trace(void)
> > +{
> > + struct dentry *d_tracer;
> > + struct dentry *entry;
> > +
> > + d_tracer = tracing_init_dentry();
> > + ksym_filter_entry_count = 0;
> > +
> > + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
>
>
>
> Still writeable for everyone?
>
> Thanks,
> Frederic.
>
Looks like I missed the change! Please find the next patchset to contain
a '644' permission mode.
Thanks for reviewing the code.
-- K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint interfaces - v4
2009-05-12 14:19 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint " K.Prasad
@ 2009-05-12 15:15 ` Frederic Weisbecker
2009-05-12 20:02 ` [Patch 11/12] ftrace plugin for kernel symbol tracing usingHWBreakpoint " K.Prasad
0 siblings, 1 reply; 40+ messages in thread
From: Frederic Weisbecker @ 2009-05-12 15:15 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Tue, May 12, 2009 at 07:49:44PM +0530, K.Prasad wrote:
> On Tue, May 12, 2009 at 12:14:29AM +0200, Frederic Weisbecker wrote:
> > On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> > > This patch adds an ftrace plugin to detect and profile memory access over kernel
> > > variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
> > >
> > > +void ksym_collect_stats(unsigned long hbp_hit_addr)
> > > +{
> > > + struct hlist_node *node;
> > > + struct trace_ksym *entry;
> > > +
> > > + rcu_read_lock();
> > > + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
> > > + if ((entry->ksym_addr == hbp_hit_addr) &&
> > > + (entry->counter <= MAX_UL_INT)) {
> > > + entry->counter++;
> > > + break;
> > > + }
> > > + }
> > > + rcu_read_unlock();
> >
> >
> >
> > rcu looks a good idea to maintain your list.
> >
>
> True, and there weren't many choices either. The earlier implementations
> with mutex/spin_lock turned out to be incorrect in their own way (while
> mutexes cannot be used inside exception handler context, spinlocks led to
> potential circular dependancy).
>
> > > +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> > > +{
> > > + struct trace_ksym *entry;
> > > + int ret;
> > > +
> > > + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > > + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > > + " new requests for tracing can be accepted now.\n",
> > > + KSYM_TRACER_MAX);
> > > + return -ENOSPC;
> > > + }
> > > +
> > > + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> > > + if (!entry)
> > > + return -ENOMEM;
> > > +
> > > + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > > + if (!entry->ksym_hbp) {
> > > + kfree(entry);
> > > + return -ENOMEM;
> > > + }
> > > +
> > > + entry->ksym_hbp->info.name = ksymname;
> > > + entry->ksym_hbp->info.type = op;
> > > + entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> > > +#ifdef CONFIG_X86
> > > + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> > > +#endif
> > > + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> > > +
> > > + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> > > + if (ret < 0) {
> > > + printk(KERN_INFO "ksym_tracer request failed. Try again"
> > > + " later!!\n");
> > > + kfree(entry->ksym_hbp);
> > > + kfree(entry);
> > > + return -EAGAIN;
> > > + }
> > > + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
> >
> >
> > And then ksym_tracer_mutex protect concurrent writers.
> >
> >
>
> Yes, they synchronise read/write operations over the list pointed by
> ksym_filter_head.
>
> > > + ksym_filter_entry_count++;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > > + size_t count, loff_t *ppos)
> > > +{
> > > + struct trace_ksym *entry;
> > > + struct hlist_node *node;
> > > + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > > + ssize_t ret, cnt = 0;
> > > +
> > > + mutex_lock(&ksym_tracer_mutex);
> > > +
> > > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> >
> >
> >
> > And here you don't use the rcu version.
> > I guess it's fine since you're protected by the writer lock...
> >
>
> I couldn't use RCU here because the simple_read_from_buffer() is
> inatomic, and hence the mutex.
Ah indeed, it might sleep, you're right.
> > > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > > + entry->ksym_hbp->info.name);
> > > + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> > > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > > + "-w-\n");
> > > + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> > > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > > + "rw-\n");
> > > + }
> > > + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > > + mutex_unlock(&ksym_tracer_mutex);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +static ssize_t ksym_trace_filter_write(struct file *file,
> > > + const char __user *buffer,
> > > + size_t count, loff_t *ppos)
> > > +{
> > > + struct trace_ksym *entry;
> > > + struct hlist_node *node;
> > > + char *input_string, *ksymname = NULL;
> > > + unsigned long ksym_addr = 0;
> > > + int ret, op, changed = 0;
> > > +
> > > + /* Ignore echo "" > ksym_trace_filter */
> > > + if (count == 0)
> > > + return 0;
> > > +
> > > + input_string = kzalloc(count, GFP_KERNEL);
> > > + if (!input_string)
> > > + return -ENOMEM;
> > > +
> > > + if (copy_from_user(input_string, buffer, count)) {
> > > + kfree(input_string);
> > > + return -EFAULT;
> > > + }
> > > +
> > > + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > > + if (ret < 0) {
> > > + kfree(input_string);
> > > + return ret;
> > > + }
> > > +
> > > + mutex_lock(&ksym_tracer_mutex);
> > > +
> > > + ret = -EINVAL;
> > > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> >
> >
> >
> > Same here, ok...
> >
> >
> > > +static int ksym_trace_init(struct trace_array *tr)
> > > +{
> > > + int cpu, ret = 0;
> > > +
> > > + for_each_online_cpu(cpu)
> > > + tracing_reset(tr, cpu);
> > > +
> > > + ksym_tracing_enabled = 1;
> > > + ksym_trace_array = tr;
> > > +
> > > +#ifdef CONFIG_FTRACE_SELFTEST
> > > + /* Check if we are re-entering self-test code during initialisation */
> > > + if (ksym_selftest_dummy)
> > > + goto ret_path;
> > > +
> > > + ksym_selftest_dummy = 0;
> > > +
> > > + /* Register the read-write tracing request */
> > > + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> > > + (unsigned long)(&ksym_selftest_dummy));
> > > +
> > > + if (ret < 0) {
> > > + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> > > + goto ret_path;
> > > + }
> > > + /* Perform a read and a write operation over the dummy variable to
> > > + * trigger the tracer
> > > + */
> > > + if (ksym_selftest_dummy == 0)
> > > + ksym_selftest_dummy++;
> > > +ret_path:
> > > +#endif /* CONFIG_FTRACE_SELFTEST */
> >
> >
> > It means that each time your tracer is selected, it will perform a selftest.
> > I think we only need this selftest once during the boot.
> > I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
> >
>
> > > + if (ksym_selftest_dummy)
> > > + goto ret_path;
>
> The above check will help prevent a re-run of the test everytime init is
> executed.
>
> A part of the selftest was kept in trace_ksym.c (and hence in
> ksym_trace_init()) in order to use functions local to this file, such as
> process_new_ksym_entry().
Ok.
May be you could just compute your selftests on the breakpoint themselves,
not on the entries in the ring buffer.
That's what do other tracers because it's usually convenient, but the main
focuse is the event itself, because the ring buffer is already tested.
The only thing you need to test is the hardware breakpoint triggering, then
you wouldn't need anymore the trace_selftest.c helpers and you could
implement your selftest directly in your tracer file.
I remember such thing has been discussed recently but I'm not sure
whether it concerned the kprobe tracer or yours.
> > > +__init static int init_ksym_trace(void)
> > > +{
> > > + struct dentry *d_tracer;
> > > + struct dentry *entry;
> > > +
> > > + d_tracer = tracing_init_dentry();
> > > + ksym_filter_entry_count = 0;
> > > +
> > > + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> >
> >
> >
> > Still writeable for everyone?
> >
> > Thanks,
> > Frederic.
> >
>
> Looks like I missed the change! Please find the next patchset to contain
> a '644' permission mode.
Thanks, I'm looking forward for the next version.
Frederic.
> Thanks for reviewing the code.
>
> -- K.Prasad
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 11/12] ftrace plugin for kernel symbol tracing usingHWBreakpoint interfaces - v4
2009-05-12 15:15 ` Frederic Weisbecker
@ 2009-05-12 20:02 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-12 20:02 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Tue, May 12, 2009 at 05:15:05PM +0200, Frederic Weisbecker wrote:
> On Tue, May 12, 2009 at 07:49:44PM +0530, K.Prasad wrote:
> > On Tue, May 12, 2009 at 12:14:29AM +0200, Frederic Weisbecker wrote:
> > > On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> > >
> > > > +static int ksym_trace_init(struct trace_array *tr)
> > > > +{
> > > > + int cpu, ret = 0;
> > > > +
> > > > + for_each_online_cpu(cpu)
> > > > + tracing_reset(tr, cpu);
> > > > +
> > > > + ksym_tracing_enabled = 1;
> > > > + ksym_trace_array = tr;
> > > > +
> > > > +#ifdef CONFIG_FTRACE_SELFTEST
> > > > + /* Check if we are re-entering self-test code during initialisation */
> > > > + if (ksym_selftest_dummy)
> > > > + goto ret_path;
> > > > +
> > > > + ksym_selftest_dummy = 0;
> > > > +
> > > > + /* Register the read-write tracing request */
> > > > + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> > > > + (unsigned long)(&ksym_selftest_dummy));
> > > > +
> > > > + if (ret < 0) {
> > > > + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> > > > + goto ret_path;
> > > > + }
> > > > + /* Perform a read and a write operation over the dummy variable to
> > > > + * trigger the tracer
> > > > + */
> > > > + if (ksym_selftest_dummy == 0)
> > > > + ksym_selftest_dummy++;
> > > > +ret_path:
> > > > +#endif /* CONFIG_FTRACE_SELFTEST */
> > >
> > >
> > > It means that each time your tracer is selected, it will perform a selftest.
> > > I think we only need this selftest once during the boot.
> > > I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
> > >
> >
> > > > + if (ksym_selftest_dummy)
> > > > + goto ret_path;
> >
> > The above check will help prevent a re-run of the test everytime init is
> > executed.
> >
> > A part of the selftest was kept in trace_ksym.c (and hence in
> > ksym_trace_init()) in order to use functions local to this file, such as
> > process_new_ksym_entry().
>
>
>
> Ok.
> May be you could just compute your selftests on the breakpoint themselves,
> not on the entries in the ring buffer.
>
> That's what do other tracers because it's usually convenient, but the main
> focuse is the event itself, because the ring buffer is already tested.
> The only thing you need to test is the hardware breakpoint triggering, then
> you wouldn't need anymore the trace_selftest.c helpers and you could
> implement your selftest directly in your tracer file.
>
> I remember such thing has been discussed recently but I'm not sure
> whether it concerned the kprobe tracer or yours.
>
I agree that the ring buffer infrastructure need not be tested itself,
but to just check the trigger of HW Breakpoint without populating the
ring buffer will need a new breakpoint handler defined exclusively for
startup self-test, or additional checks in ksym_hbp_handler() that
executes everytime for normal exceptions too.
I will move all code from ksym_trace_init() to
trace_selftest_startup_kysm() though.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090513160546.592373797@prasadkr_t60p.in.ibm.com>
@ 2009-05-13 16:14 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-13 16:14 UTC (permalink / raw)
To: Alan Stern
Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu, K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 7451 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of abstract debug registers in
process-handling routines.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 17 deletions(-)
Index: arch/x86/kernel/process.c
===================================================================
--- arch/x86/kernel/process.c.orig
+++ arch/x86/kernel/process.c
@@ -16,6 +16,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -47,6 +49,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -105,14 +109,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -194,16 +193,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: arch/x86/kernel/process_32.c
===================================================================
--- arch/x86/kernel/process_32.c.orig
+++ arch/x86/kernel/process_32.c
@@ -61,6 +61,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -265,7 +267,14 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -285,10 +294,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -427,6 +439,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: arch/x86/kernel/process_64.c
===================================================================
--- arch/x86/kernel/process_64.c.orig
+++ arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -248,6 +250,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -347,6 +357,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -492,6 +505,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the switch_to_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ switch_to_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090515105133.629980476@prasadkr_t60p.in.ibm.com>
@ 2009-05-15 10:58 ` K.Prasad
2009-05-16 0:29 ` K.Prasad
1 sibling, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-15 10:58 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Frederic Weisbecker, Steven Rostedt, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu,
K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 7463 bytes --]
From: Alan Stern <stern@rowland.harvard.edu>
This patch enables the use of abstract debug registers in
process-handling routines.
[K.Prasad: Split-out from the bigger patch and minor changes following
re-basing]
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 17 deletions(-)
Index: arch/x86/kernel/process.c
===================================================================
--- arch/x86/kernel/process.c.orig
+++ arch/x86/kernel/process.c
@@ -16,6 +16,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -47,6 +49,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -105,14 +109,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -194,16 +193,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: arch/x86/kernel/process_32.c
===================================================================
--- arch/x86/kernel/process_32.c.orig
+++ arch/x86/kernel/process_32.c
@@ -61,6 +61,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -265,7 +267,14 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -285,10 +294,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -427,6 +439,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: arch/x86/kernel/process_64.c
===================================================================
--- arch/x86/kernel/process_64.c.orig
+++ arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -248,6 +250,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -347,6 +357,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -492,6 +505,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090515105133.629980476@prasadkr_t60p.in.ibm.com>
2009-05-15 10:58 ` K.Prasad
@ 2009-05-16 0:29 ` K.Prasad
1 sibling, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-16 0:29 UTC (permalink / raw)
To: Ingo Molnar, Linux Kernel Mailing List
Cc: Alan Stern, Frederic Weisbecker, Steven Rostedt, Andrew Morton,
Benjamin Herrenschmidt, maneesh, Roland McGrath, Masami Hiramatsu,
K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 7379 bytes --]
This patch enables the use of abstract debug registers in
process-handling routines.
Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 17 deletions(-)
Index: arch/x86/kernel/process.c
===================================================================
--- arch/x86/kernel/process.c.orig
+++ arch/x86/kernel/process.c
@@ -16,6 +16,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -47,6 +49,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -105,14 +109,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -194,16 +193,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: arch/x86/kernel/process_32.c
===================================================================
--- arch/x86/kernel/process_32.c.orig
+++ arch/x86/kernel/process_32.c
@@ -61,6 +61,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -265,7 +267,14 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -285,10 +294,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -427,6 +439,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: arch/x86/kernel/process_64.c
===================================================================
--- arch/x86/kernel/process_64.c.orig
+++ arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -248,6 +250,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -347,6 +357,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -492,6 +505,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090521095613.834622717@prasadkr_t60p.in.ibm.com>
@ 2009-05-21 14:02 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-21 14:02 UTC (permalink / raw)
To: Ingo Molnar, Frederic Weisbecker
Cc: Linux Kernel Mailing List, Alan Stern, K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 7559 bytes --]
This patch enables the use of abstract debug registers in
process-handling routines.
Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 17 deletions(-)
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -106,14 +110,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -195,16 +194,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_32.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -262,7 +264,14 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
+
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -282,10 +291,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -424,6 +436,25 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_64.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -344,6 +354,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -489,6 +502,26 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two. The hw-breakpoint handler will see
+ * that current is different from the task pointer stored in
+ * last_debugged_task, so it will think the task pointer is leftover
+ * from an old task (lazy switching) and will erase it. Then until the
+ * next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * last_debugged_task at the same instant, so there will always be a
+ * window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and last_debugged_task is
+ * old. But if you move the code above then you'll create a window in
+ * which current is old and last_debugged_task is correct.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces
2009-05-11 11:52 ` [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces K.Prasad
@ 2009-05-28 5:28 ` David Gibson
2009-05-28 11:10 ` K.Prasad
0 siblings, 1 reply; 40+ messages in thread
From: David Gibson @ 2009-05-28 5:28 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 05:22:13PM +0530, K.Prasad wrote:
> This patch introduces header files containing constants, structure
> definitions and declaration of functions used by the hardware
> breakpoint interface code.
Hrm. I don't really like splitting prototypes from their
implementations as you have done. Patches in a series should be split
into logical, self-contained units, not just sliced up by file.
[snip]
> Index: include/asm-generic/hw_breakpoint.h
> ===================================================================
> --- /dev/null
> +++ include/asm-generic/hw_breakpoint.h
> @@ -0,0 +1,139 @@
> +#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
> +#define _ASM_GENERIC_HW_BREAKPOINT_H
Is there a reason this is asm-generic, rather than
linux/hw_breakpoint.h?
> +#ifndef __ARCH_HW_BREAKPOINT_H
> +#error "Please don't include this file directly"
> +#endif
> +
> +#ifdef __KERNEL__
> +#include <linux/list.h>
> +#include <linux/types.h>
> +#include <linux/kallsyms.h>
> +
> +/**
> + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
> + * @triggered: callback invoked after target address access
"triggered" seems an odd name to me - it suggests a boolean flag, not
a function pointer.
> + * @info: arch-specific breakpoint info (address, length, and type)
> + *
> + * %hw_breakpoint structures are the kernel's way of representing
> + * hardware breakpoints. These are data breakpoints
> + * (also known as "watchpoints", triggered on data access), and the breakpoint's
> + * target address can be located in either kernel space or user space.
I thought this infrastructure could also handle instruction
breakpoints. The comment above seems to contradict that.
> + * The breakpoint's address, length, and type are highly
> + * architecture-specific. The values are encoded in the @info field; you
> + * specify them when registering the breakpoint. To examine the encoded
> + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
> + * below.
> + *
> + * The address is specified as a regular kernel pointer (for kernel-space
> + * breakponts) or as an %__user pointer (for user-space breakpoints).
> + * With register_user_hw_breakpoint(), the address must refer to a
> + * location in user space. The breakpoint will be active only while the
> + * requested task is running. Conversely with
> + * register_kernel_hw_breakpoint(), the address must refer to a location
> + * in kernel space, and the breakpoint will be active on all CPUs
> + * regardless of the current task.
> + *
> + * The length is the breakpoint's extent in bytes, which is subject to
> + * certain limitations. include/asm/hw_breakpoint.h contains macros
> + * defining the available lengths for a specific architecture. Note that
> + * the address's alignment must match the length. The breakpoint will
> + * catch accesses to any byte in the range from address to address +
> + * (length - 1).
> + *
> + * The breakpoint's type indicates the sort of access that will cause it
> + * to trigger. Possible values may include:
> + *
> + * %HW_BREAKPOINT_RW (triggered on read or write access),
> + * %HW_BREAKPOINT_WRITE (triggered on write access), and
> + * %HW_BREAKPOINT_READ (triggered on read access).
> + *
> + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
> + * possibilities are available on all architectures. Execute breakpoints
> + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
Why do instruction breakpoints have a special length, rather than a
special type? Or do they have both?
> + * When a breakpoint gets hit, the @triggered callback is
> + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
> + * processor registers.
> + * Data breakpoints occur after the memory access has taken place.
> + * Breakpoints are disabled during execution @triggered, to avoid
> + * recursive traps and allow unhindered access to breakpointed memory.
> + *
> + * This sample code sets a breakpoint on pid_max and registers a callback
> + * function for writes to that variable. Note that it is not portable
> + * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
> + *
> + * ----------------------------------------------------------------------
> + *
> + * #include <asm/hw_breakpoint.h>
> + *
> + * struct hw_breakpoint my_bp;
> + *
> + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
> + * {
> + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
> + * dump_stack();
> + * .......<more debugging output>........
> + * }
> + *
> + * static struct hw_breakpoint my_bp;
> + *
> + * static int init_module(void)
> + * {
> + * ..........<do anything>............
> + * my_bp.info.type = HW_BREAKPOINT_WRITE;
> + * my_bp.info.len = HW_BREAKPOINT_LEN_4;
> + *
> + * my_bp.installed = (void *)my_bp_installed;
> + *
> + * rc = register_kernel_hw_breakpoint(&my_bp);
> + * ..........<do anything>............
> + * }
> + *
> + * static void cleanup_module(void)
> + * {
> + * ..........<do anything>............
> + * unregister_kernel_hw_breakpoint(&my_bp);
> + * ..........<do anything>............
> + * }
> + *
> + * ----------------------------------------------------------------------
> + */
> +struct hw_breakpoint {
> + void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
> + struct arch_hw_breakpoint info;
> +};
> +
> +/*
> + * len and type values are defined in include/asm/hw_breakpoint.h.
> + * Available values vary according to the architecture. On i386 the
> + * possibilities are:
> + *
> + * HW_BREAKPOINT_LEN_1
> + * HW_BREAKPOINT_LEN_2
> + * HW_BREAKPOINT_LEN_4
> + * HW_BREAKPOINT_RW
> + * HW_BREAKPOINT_READ
> + *
> + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
> + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
> + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
> + */
> +
> +int register_user_hw_breakpoint(struct task_struct *tsk,
> + struct hw_breakpoint *bp);
> +void unregister_user_hw_breakpoint(struct task_struct *tsk,
> + struct hw_breakpoint *bp);
> +/*
> + * Kernel breakpoints are not associated with any particular thread.
> + */
> +int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
> +void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
> +void switch_to_none_hw_breakpoint(void);
> +
> +extern unsigned int hbp_kernel_pos;
> +extern spinlock_t hw_breakpoint_lock;
> +
> +#endif /* __KERNEL__ */
> +#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
> Index: arch/x86/include/asm/hw_breakpoint.h
> ===================================================================
> --- /dev/null
> +++ arch/x86/include/asm/hw_breakpoint.h
> @@ -0,0 +1,66 @@
> +#ifndef _I386_HW_BREAKPOINT_H
> +#define _I386_HW_BREAKPOINT_H
> +
> +#ifdef __KERNEL__
> +#define __ARCH_HW_BREAKPOINT_H
> +
> +struct arch_hw_breakpoint {
> + char *name; /* Contains name of the symbol to set bkpt */
> + unsigned long address;
> + u8 len;
> + u8 type;
> +};
> +
> +#include <linux/kdebug.h>
> +#include <asm-generic/hw_breakpoint.h>
> +
> +/* Available HW breakpoint length encodings */
> +#define HW_BREAKPOINT_LEN_1 0x40
> +#define HW_BREAKPOINT_LEN_2 0x44
> +#define HW_BREAKPOINT_LEN_4 0x4c
> +#define HW_BREAKPOINT_LEN_EXECUTE 0x40
> +
> +#ifdef CONFIG_X86_64
> +#define HW_BREAKPOINT_LEN_8 0x48
> +#endif
> +
> +/* Available HW breakpoint type encodings */
> +
> +/* trigger on instruction execute */
> +#define HW_BREAKPOINT_EXECUTE 0x80
> +/* trigger on memory write */
> +#define HW_BREAKPOINT_WRITE 0x81
> +/* trigger on memory read or write */
> +#define HW_BREAKPOINT_RW 0x83
What is the reason for making the encoding of all these values
arch-specific? How would you set up the encoding for an arch where
different CPU variants have different debug facilities (as we do on
powerpc).
> +/* Total number of available HW breakpoint registers */
> +#define HB_NUM 4
> +
> +extern struct hw_breakpoint *hbp_kernel[HB_NUM];
> +DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
> +extern unsigned int hbp_user_refcount[HB_NUM];
> +
> +/*
> + * Ptrace support: breakpoint trigger routine.
> + */
> +int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
> + struct hw_breakpoint *bp);
> +int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
> + struct hw_breakpoint *bp);
> +void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk);
> +
> +void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
> +void arch_uninstall_thread_hw_breakpoint(void);
> +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
> +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len);
> +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
> + struct task_struct *tsk);
> +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
> +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
> +void arch_update_kernel_hw_breakpoint(void *);
> +int hw_breakpoint_handler(struct die_args *args);
> +int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
> + unsigned long val, void *data);
It looks like most if not all of these functions should have the same
prototype (though a different implementation) on all archs. In which
case the prototypes should go in a generic header.
> +#endif /* __KERNEL__ */
> +#endif /* _I386_HW_BREAKPOINT_H */
> +
> Index: arch/x86/include/asm/debugreg.h
> ===================================================================
> --- arch/x86/include/asm/debugreg.h.orig
> +++ arch/x86/include/asm/debugreg.h
> @@ -18,6 +18,7 @@
> #define DR_TRAP1 (0x2) /* db1 */
> #define DR_TRAP2 (0x4) /* db2 */
> #define DR_TRAP3 (0x8) /* db3 */
> +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
>
> #define DR_STEP (0x4000) /* single-step */
> #define DR_SWITCH (0x8000) /* task switch */
> @@ -49,6 +50,8 @@
>
> #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
> #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
> +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
> +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
> #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
>
> #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
> @@ -67,4 +70,32 @@
> #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
> #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
>
> +/*
> + * HW breakpoint additions
> + */
> +#ifdef __KERNEL__
> +
> +/* For process management */
> +void flush_thread_hw_breakpoint(struct task_struct *tsk);
> +int copy_thread_hw_breakpoint(struct task_struct *tsk,
> + struct task_struct *child, unsigned long clone_flags);
> +void dump_thread_hw_breakpoint(struct task_struct *tsk, int u_debugreg[8]);
> +void switch_to_thread_hw_breakpoint(struct task_struct *tsk);
> +
> +/* For CPU management */
> +void load_debug_registers(void);
> +static inline void hw_breakpoint_disable(void)
> +{
> + /* Zero the control register for HW Breakpoint */
> + set_debugreg(0UL, 7);
> +
> + /* Zero-out the individual HW breakpoint address registers */
> + set_debugreg(0UL, 0);
> + set_debugreg(0UL, 1);
> + set_debugreg(0UL, 2);
> + set_debugreg(0UL, 3);
> +}
> +
> +#endif /* __KERNEL__ */
> +
> #endif /* _ASM_X86_DEBUGREG_H */
> Index: arch/x86/include/asm/processor.h
> ===================================================================
> --- arch/x86/include/asm/processor.h.orig
> +++ arch/x86/include/asm/processor.h
> @@ -29,6 +29,7 @@ struct mm_struct;
> #include <linux/threads.h>
> #include <linux/init.h>
>
> +#define HB_NUM 4
Uh.. this #define appears to be duplicated in asm-x86/hw_breakpoint.h
and here.
> /*
> * Default implementation of macro that returns current
> * instruction pointer ("program counter").
> @@ -435,8 +436,11 @@ struct thread_struct {
> unsigned long debugreg1;
> unsigned long debugreg2;
> unsigned long debugreg3;
> + unsigned long debugreg[HB_NUM];
> unsigned long debugreg6;
> unsigned long debugreg7;
> + /* Hardware breakpoint info */
> + struct hw_breakpoint *hbp[HB_NUM];
> /* Fault info: */
> unsigned long cr2;
> unsigned long trap_no;
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 02/12] Introducing generic hardware breakpoint handler interfaces
2009-05-11 11:52 ` [Patch 02/12] Introducing generic hardware breakpoint handler interfaces K.Prasad
2009-05-11 12:12 ` Bharata B Rao
@ 2009-05-28 6:15 ` David Gibson
2009-05-28 11:55 ` K.Prasad
1 sibling, 1 reply; 40+ messages in thread
From: David Gibson @ 2009-05-28 6:15 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 05:22:40PM +0530, K.Prasad wrote:
> This patch introduces the generic Hardware Breakpoint interfaces for
> both user and kernel space requests.
[snip]
> Index: kernel/hw_breakpoint.c
> ===================================================================
> --- /dev/null
> +++ kernel/hw_breakpoint.c
> @@ -0,0 +1,367 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) 2007 Alan Stern
> + * Copyright (C) IBM Corporation, 2009
> + */
> +
> +/*
> + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
> + * using the CPU's debug registers.
> + *
> + * This file contains the arch-independent routines. It is not meant
> + * to be compiled as a standalone source file; rather it should be
> + * #include'd by the arch-specific implementation.
> + */
> +
> +#include <linux/irqflags.h>
> +#include <linux/kallsyms.h>
> +#include <linux/notifier.h>
> +#include <linux/kprobes.h>
> +#include <linux/kdebug.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/percpu.h>
> +#include <linux/mutex.h>
> +#include <linux/sched.h>
> +#include <linux/init.h>
> +#include <linux/smp.h>
> +
> +#include <asm/hw_breakpoint.h>
> +#include <asm/processor.h>
> +
> +#ifdef CONFIG_X86
That's a nasty #ifdef. Surely this include should instead go in the
x86 version of hw_breakpoint.h.
> +#include <asm/debugreg.h>
> +#endif
> +/*
> + * Spinlock that protects all (un)register operations over kernel/user-space
> + * breakpoint requests
> + */
> +DEFINE_SPINLOCK(hw_breakpoint_lock);
> +
> +/* Array of kernel-space breakpoint structures */
> +struct hw_breakpoint *hbp_kernel[HB_NUM];
> +
> +/*
> + * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
> + * modified but we need the older copy to handle any hbp exceptions. It will
> + * sync with hbp_kernel[] value after updation is done through IPIs.
> + */
> +DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
Ok.. this seems to assume that all breakpoints are equal as far as
allocation goes - i.e. that the total number of breakpoints is the
only allocation limit. That's apparently true for x86, but on
embedded powerpc (4xx), for example, we can have up to 2 data
breakpoints and up to 4 instruction breakpoints independent of each
other.
> +/*
> + * Kernel breakpoints grow downwards, starting from HB_NUM
> + * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
> + * kernel-space request. We will initialise it here and not in an __init
> + * routine because load_debug_registers(), which uses this variable can be
> + * called very early during CPU initialisation.
> + */
> +unsigned int hbp_kernel_pos = HB_NUM;
Likewise here. For 44x we would need separate tables and separate
pointers for data and instruction breakpoints.
> +/*
> + * An array containing refcount of threads using a given bkpt register
> + * Accesses are synchronised by acquiring hw_breakpoint_lock
> + */
> +unsigned int hbp_user_refcount[HB_NUM];
> +
> +/*
> + * Install the debug register values for a new thread.
> + */
> +void switch_to_thread_hw_breakpoint(struct task_struct *tsk)
> +{
> + /* Set the debug register */
> + arch_install_thread_hw_breakpoint(tsk);
Um.. and the point of this wrapper which does nothing extra around the
arch-specific function would be...?
> +}
> +
> +/*
> + * Install the debug register values for just the kernel, no thread.
> + */
> +void switch_to_none_hw_breakpoint(void)
> +{
> + arch_uninstall_thread_hw_breakpoint();
And again.
> +}
> +
> +/*
> + * Load the debug registers during startup of a CPU.
> + */
> +void load_debug_registers(void)
This name does not suggest a boot and/or cpu hotplug only function.
> +{
> + unsigned long flags;
> + struct task_struct *tsk = current;
> +
> + spin_lock_bh(&hw_breakpoint_lock);
> +
> + /* Prevent IPIs for new kernel breakpoint updates */
> + local_irq_save(flags);
> + arch_update_kernel_hw_breakpoint(NULL);
> + local_irq_restore(flags);
> +
> + if (test_tsk_thread_flag(tsk, TIF_DEBUG))
> + switch_to_thread_hw_breakpoint(tsk);
> +
> + spin_unlock_bh(&hw_breakpoint_lock);
> +}
> +
> +/*
> + * Erase all the hardware breakpoint info associated with a thread.
> + *
> + * If tsk != current then tsk must not be usable (for example, a
> + * child being cleaned up from a failed fork).
> + */
> +void flush_thread_hw_breakpoint(struct task_struct *tsk)
> +{
> + int i;
> + struct thread_struct *thread = &(tsk->thread);
> +
> + spin_lock_bh(&hw_breakpoint_lock);
> +
> + /* The thread no longer has any breakpoints associated with it */
> + clear_tsk_thread_flag(tsk, TIF_DEBUG);
> + for (i = 0; i < HB_NUM; i++) {
> + if (thread->hbp[i]) {
> + hbp_user_refcount[i]--;
> + kfree(thread->hbp[i]);
> + thread->hbp[i] = NULL;
> + }
> + }
> +
> + arch_flush_thread_hw_breakpoint(tsk);
> +
> + /* Actually uninstall the breakpoints if necessary */
> + if (tsk == current)
> + switch_to_none_hw_breakpoint();
> + spin_unlock_bh(&hw_breakpoint_lock);
> +}
> +
> +/*
> + * Copy the hardware breakpoint info from a thread to its cloned
> child.
This comment contradicts what the code actually does.
> + */
> +int copy_thread_hw_breakpoint(struct task_struct *tsk,
> + struct task_struct *child, unsigned long clone_flags)
> +{
> + /*
> + * We will assume that breakpoint settings are not inherited
> + * and the child starts out with no debug registers set.
> + * But what about CLONE_PTRACE?
> + */
> + clear_tsk_thread_flag(child, TIF_DEBUG);
> +
> + /* We will call flush routine since the debugregs are not inherited */
> + arch_flush_thread_hw_breakpoint(child);
> +
> + return 0;
> +}
> +
> +int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
> + struct hw_breakpoint *bp)
> +{
> + struct thread_struct *thread = &(tsk->thread);
> + int rc;
> +
> + /* Do not overcommit. Fail if kernel has used the hbp registers */
> + if (pos >= hbp_kernel_pos)
> + return -ENOSPC;
Again, this is generic code making a non-portable assumption about how
breakpoints must be allocated.
> + rc = arch_validate_hwbkpt_settings(bp, tsk);
> + if (rc)
> + return rc;
> +
> + thread->hbp[pos] = bp;
> + hbp_user_refcount[pos]++;
> +
> + arch_update_user_hw_breakpoint(pos, tsk);
> + /*
> + * Does it need to be installed right now?
> + * Otherwise it will get installed the next time tsk runs
> + */
> + if (tsk == current)
> + switch_to_thread_hw_breakpoint(tsk);
> +
> + return rc;
> +}
> +
> +/*
> + * Modify the address of a hbp register already in use by the task
> + * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint()
> + */
> +int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
> + struct hw_breakpoint *bp)
> +{
> + struct thread_struct *thread = &(tsk->thread);
> +
> + if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk)))
> + return -EINVAL;
> +
> + if (thread->hbp[pos] == NULL)
> + return -EINVAL;
> +
> + thread->hbp[pos] = bp;
> + /*
> + * 'pos' must be that of a hbp register already used by 'tsk'
> + * Otherwise arch_modify_user_hw_breakpoint() will fail
> + */
> + arch_update_user_hw_breakpoint(pos, tsk);
> +
> + if (tsk == current)
> + switch_to_thread_hw_breakpoint(tsk);
> +
> + return 0;
> +}
> +
> +void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk)
> +{
> + hbp_user_refcount[pos]--;
> + tsk->thread.hbp[pos] = NULL;
> +
> + arch_update_user_hw_breakpoint(pos, tsk);
> +
> + if (tsk == current)
> + switch_to_thread_hw_breakpoint(tsk);
> +}
> +
> +/**
> + * register_user_hw_breakpoint - register a hardware breakpoint for user space
> + * @tsk: pointer to 'task_struct' of the process to which the address belongs
> + * @bp: the breakpoint structure to register
> + *
> + * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
> + * @bp->triggered must be set properly before invocation
> + *
> + */
> +int register_user_hw_breakpoint(struct task_struct *tsk,
> + struct hw_breakpoint *bp)
> +{
> + struct thread_struct *thread = &(tsk->thread);
> + int i, rc = -ENOSPC;
> +
> + spin_lock_bh(&hw_breakpoint_lock);
> +
> + for (i = 0; i < hbp_kernel_pos; i++) {
> + if (!thread->hbp[i]) {
> + rc = __register_user_hw_breakpoint(i, tsk, bp);
> + break;
> + }
> + }
> +
> + spin_unlock_bh(&hw_breakpoint_lock);
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
> +
> +/**
> + * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint
> + * @tsk: pointer to 'task_struct' of the process to which the address belongs
> + * @bp: the breakpoint structure to unregister
> + *
> + */
> +void unregister_user_hw_breakpoint(struct task_struct *tsk,
> + struct hw_breakpoint *bp)
> +{
> + struct thread_struct *thread = &(tsk->thread);
> + int i;
> +
> + spin_lock_bh(&hw_breakpoint_lock);
> + for (i = 0; i < hbp_kernel_pos; i++) {
> + if (bp == thread->hbp[i]) {
> + __unregister_user_hw_breakpoint(i, tsk);
> + break;
> + }
> + }
> + spin_unlock_bh(&hw_breakpoint_lock);
> +}
> +EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint);
> +
> +/**
> + * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space
> + * @bp: the breakpoint structure to register
> + *
> + * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and
> + * @bp->triggered must be set properly before invocation
> + *
> + */
> +int register_kernel_hw_breakpoint(struct hw_breakpoint *bp)
> +{
> + int rc;
> +
> + rc = arch_validate_hwbkpt_settings(bp, NULL);
> + if (rc)
> + return rc;
> +
> + spin_lock_bh(&hw_breakpoint_lock);
> +
> + rc = -EINVAL;
> + /* Check if we are over-committing */
> + if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) {
> + hbp_kernel_pos--;
> + hbp_kernel[hbp_kernel_pos] = bp;
> + on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
> + rc = 0;
> + }
> +
> + spin_unlock_bh(&hw_breakpoint_lock);
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint);
> +
> +/**
> + * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space
> + * @bp: the breakpoint structure to unregister
> + *
> + * Uninstalls and unregisters @bp.
> + */
> +void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp)
> +{
> + int i, j;
> +
> + spin_lock_bh(&hw_breakpoint_lock);
> +
> + /* Find the 'bp' in our list of breakpoints for kernel */
> + for (i = hbp_kernel_pos; i < HB_NUM; i++)
> + if (bp == hbp_kernel[i])
> + break;
> +
> + /* Check if we did not find a match for 'bp'. If so return early */
> + if (i == HB_NUM) {
> + spin_unlock_bh(&hw_breakpoint_lock);
> + return;
> + }
> +
> + /*
> + * We'll shift the breakpoints one-level above to compact if
> + * unregistration creates a hole
> + */
> + for (j = i; j > hbp_kernel_pos; j--)
> + hbp_kernel[j] = hbp_kernel[j-1];
> +
> + hbp_kernel[hbp_kernel_pos] = NULL;
> + on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1);
> + hbp_kernel_pos++;
> +
> + spin_unlock_bh(&hw_breakpoint_lock);
> +}
> +EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint);
> +
> +static struct notifier_block hw_breakpoint_exceptions_nb = {
> + .notifier_call = hw_breakpoint_exceptions_notify,
> + /* we need to be notified first */
> + .priority = 0x7fffffff
> +};
> +
> +static int __init init_hw_breakpoint(void)
> +{
> + return register_die_notifier(&hw_breakpoint_exceptions_nb);
> +}
> +
> +core_initcall(init_hw_breakpoint);
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces
2009-05-11 11:53 ` [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
@ 2009-05-28 6:35 ` David Gibson
2009-05-28 13:41 ` K.Prasad
0 siblings, 1 reply; 40+ messages in thread
From: David Gibson @ 2009-05-28 6:35 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 05:23:03PM +0530, K.Prasad wrote:
> This patch introduces the arch-specific implementation of
> hw_breakpoint.c inside x86 specific directories. They contain
> functions which help validate and serve requests for using Hardware
> Breakpoint registers on x86 processors.
[snip]
> Index: arch/x86/kernel/hw_breakpoint.c
> ===================================================================
> --- /dev/null
> +++ arch/x86/kernel/hw_breakpoint.c
> @@ -0,0 +1,358 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) 2007 Alan Stern
> + * Copyright (C) 2009 IBM Corporation
> + */
> +
> +/*
> + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
> + * using the CPU's debug registers.
> + */
> +
> +#include <linux/irqflags.h>
> +#include <linux/notifier.h>
> +#include <linux/kallsyms.h>
> +#include <linux/kprobes.h>
> +#include <linux/percpu.h>
> +#include <linux/kdebug.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/sched.h>
> +#include <linux/init.h>
> +#include <linux/smp.h>
> +
> +#include <asm/hw_breakpoint.h>
> +#include <asm/processor.h>
> +#include <asm/debugreg.h>
> +
> +/* Unmasked kernel DR7 value */
> +static unsigned long kdr7;
> +
> +/*
> + * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register.
> + * Used to clear and verify the status of bits corresponding to DR0 - DR3
> + */
> +static const unsigned long dr7_masks[HB_NUM] = {
> + 0x000f0003, /* LEN0, R/W0, G0, L0 */
> + 0x00f0000c, /* LEN1, R/W1, G1, L1 */
> + 0x0f000030, /* LEN2, R/W2, G2, L2 */
> + 0xf00000c0 /* LEN3, R/W3, G3, L3 */
> +};
> +
> +
> +/*
> + * Encode the length, type, Exact, and Enable bits for a particular breakpoint
> + * as stored in debug register 7.
> + */
> +static unsigned long encode_dr7(int drnum, unsigned len, unsigned type)
> +{
> + unsigned long temp;
> +
> + temp = (len | type) & 0xf;
> + temp <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
> + temp |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
> + DR_GLOBAL_SLOWDOWN;
> + return temp;
> +}
> +
> +void arch_update_kernel_hw_breakpoints(void *unused)
> +{
> + struct hw_breakpoint *bp;
> + int i, cpu = get_cpu();
> +
> + /* Don't allow debug exceptions while we update the registers */
> + set_debugreg(0UL, 7);
> +
> + /* Clear all kernel-space breakpoints in kdr7 */
> + kdr7 = 0;
> + for (i = hbp_kernel_pos; i < HB_NUM; i++) {
> + per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i];
> + if (bp) {
> + kdr7 |= encode_dr7(i, bp->info.len, bp->info.type);
> + set_debugreg(hbp_kernel[i]->info.address, i);
> + }
> + }
> +
> + /* No need to set DR6. Update the debug registers with kernel-space
> + * breakpoint values from kdr7 and user-space requests from the
> + * current process
> + */
> + set_debugreg(kdr7 | current->thread.debugreg7, 7);
> + put_cpu_no_resched();
> +}
> +
> +/*
> + * Install the thread breakpoints in their debug registers.
> + */
> +void arch_install_thread_hw_breakpoint(struct task_struct *tsk)
> +{
> + struct thread_struct *thread = &(tsk->thread);
> +
> + switch (hbp_kernel_pos) {
> + case 4:
> + set_debugreg(thread->debugreg[3], 3);
> + case 3:
> + set_debugreg(thread->debugreg[2], 2);
> + case 2:
> + set_debugreg(thread->debugreg[1], 1);
> + case 1:
> + set_debugreg(thread->debugreg[0], 0);
> + default:
> + break;
> + }
> +
> + /* No need to set DR6 */
> + set_debugreg((kdr7 | thread->debugreg7), 7);
> +}
> +
> +/*
> + * Install the debug register values for just the kernel, no thread.
> + */
> +void arch_uninstall_thread_hw_breakpoint()
> +{
> + /* Clear the user-space portion of debugreg7 by setting only kdr7 */
> + set_debugreg(kdr7, 7);
> +
> +}
> +
> +static int get_hbp_len(u8 hbp_len)
> +{
> + unsigned int len_in_bytes = 0;
> +
> + switch (hbp_len) {
> + case HW_BREAKPOINT_LEN_1:
> + len_in_bytes = 1;
> + break;
> + case HW_BREAKPOINT_LEN_2:
> + len_in_bytes = 2;
> + break;
> + case HW_BREAKPOINT_LEN_4:
> + len_in_bytes = 4;
> + break;
> +#ifdef CONFIG_X86_64
> + case HW_BREAKPOINT_LEN_8:
> + len_in_bytes = 8;
> + break;
> +#endif
Hrm, the fact that you have to do this nasty back-conversion again
makes me wonder at the wisdom of having per-arch encodings for
breakpoint length.
> + }
> + return len_in_bytes;
> +}
> +
> +/*
> + * Check for virtual address in user space.
> + */
> +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
> +{
> + unsigned int len;
> +
> + len = get_hbp_len(hbp_len);
> +
> + return (va <= TASK_SIZE - len);
> +}
> +
> +/*
> + * Check for virtual address in kernel space.
> + */
> +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
> +{
> + unsigned int len;
> +
> + len = get_hbp_len(hbp_len);
> +
> + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
> +}
> +
> +/*
> + * Store a breakpoint's encoded address, length, and type.
> + */
> +static int arch_store_info(struct hw_breakpoint *bp)
This function doesn't look very arch specific. Nor is the name very
meaningful.
> +{
> + /*
> + * User-space requests will always have the address field populated
> + * For kernel-addresses, either the address or symbol name can be
> + * specified.
> + */
> + if (bp->info.name)
> + bp->info.address = (unsigned long)
> + kallsyms_lookup_name(bp->info.name);
> + if (bp->info.address)
> + return 0;
> + return -EINVAL;
> +}
> +
> +/*
> + * Validate the arch-specific HW Breakpoint register settings
> + */
> +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
> + struct task_struct *tsk)
> +{
> + unsigned int align;
> + int ret = -EINVAL;
> +
> + switch (bp->info.type) {
> + /*
> + * Ptrace-refactoring code
> + * For now, we'll allow instruction breakpoint only for user-space
> + * addresses
> + */
> + case HW_BREAKPOINT_EXECUTE:
> + if ((!arch_check_va_in_userspace(bp->info.address,
> + bp->info.len)) &&
> + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
> + return ret;
> + break;
> + case HW_BREAKPOINT_WRITE:
> + break;
> + case HW_BREAKPOINT_RW:
> + break;
> + default:
> + return ret;
> + }
> +
> + switch (bp->info.len) {
> + case HW_BREAKPOINT_LEN_1:
> + align = 0;
> + break;
> + case HW_BREAKPOINT_LEN_2:
> + align = 1;
> + break;
> + case HW_BREAKPOINT_LEN_4:
> + align = 3;
> + break;
> +#ifdef CONFIG_X86_64
> + case HW_BREAKPOINT_LEN_8:
> + align = 7;
> + break;
> +#endif
> + default:
> + return ret;
> + }
> +
> + if (bp->triggered)
> + ret = arch_store_info(bp);
> +
> + if (ret < 0)
> + return ret;
> + /*
> + * Check that the low-order bits of the address are appropriate
> + * for the alignment implied by len.
> + */
> + if (bp->info.address & align)
> + return -EINVAL;
> +
> + /* Check that the virtual address is in the proper range */
> + if (tsk) {
> + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
> + return -EFAULT;
> + } else {
> + if (!arch_check_va_in_kernelspace(bp->info.address,
> + bp->info.len))
> + return -EFAULT;
> + }
> + return 0;
> +}
> +
> +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
> +{
> + struct thread_struct *thread = &(tsk->thread);
> + struct hw_breakpoint *bp = thread->hbp[pos];
> +
> + thread->debugreg7 &= ~dr7_masks[pos];
> + if (bp) {
> + thread->debugreg[pos] = bp->info.address;
> + thread->debugreg7 |= encode_dr7(pos, bp->info.len,
> + bp->info.type);
> + } else
> + thread->debugreg[pos] = 0;
> +}
> +
> +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
> +{
> + int i;
> + struct thread_struct *thread = &(tsk->thread);
> +
> + thread->debugreg7 = 0;
> + for (i = 0; i < HB_NUM; i++)
> + thread->debugreg[i] = 0;
> +}
> +
> +/*
> + * Handle debug exception notifications.
> + */
> +int __kprobes hw_breakpoint_handler(struct die_args *args)
> +{
> + int i, cpu, rc = NOTIFY_STOP;
> + struct hw_breakpoint *bp;
> + /* The DR6 value is stored in args->err */
> + unsigned long dr7, dr6 = args->err;
> +
> + /* Do an early return if no trap bits are set in DR6 */
> + if ((dr6 & DR_TRAP_BITS) == 0)
> + return NOTIFY_DONE;
> +
> + /* Lazy debug register switching */
> + if (!test_tsk_thread_flag(current, TIF_DEBUG))
> + switch_to_none_hw_breakpoint();
Shouldn't you also drop out of the handler here, rather than having to
again check for a false alarm below.
It's also not immediately clear to me how lazy switching buys you
anything here, since it's only lazy switching off, not lazy switching
on.
> + get_debugreg(dr7, 7);
> + /* Disable breakpoints during exception handling */
> + set_debugreg(0UL, 7);
> + /*
> + * Assert that local interrupts are disabled
> + * Reset the DRn bits in the virtualized register value.
> + * The ptrace trigger routine will add in whatever is needed.
> + */
> + current->thread.debugreg6 &= ~DR_TRAP_BITS;
> + cpu = get_cpu();
> +
> + /* Handle all the breakpoints that were triggered */
> + for (i = 0; i < HB_NUM; ++i) {
> + if (likely(!(dr6 & (DR_TRAP0 << i))))
> + continue;
> + /*
> + * Find the corresponding hw_breakpoint structure and
> + * invoke its triggered callback.
> + */
> + if (i >= hbp_kernel_pos)
> + bp = per_cpu(this_hbp_kernel[i], cpu);
> + else {
> + bp = current->thread.hbp[i];
> + /* False alarm due to lazy DR switching */
> + if (!bp)
> + continue;
> + rc = NOTIFY_DONE;
> + }
> + (bp->triggered)(bp, args->regs);
> + }
> + if (dr6 & (~DR_TRAP_BITS))
> + rc = NOTIFY_DONE;
> +
> + set_debugreg(dr7, 7);
> + put_cpu_no_resched();
> + return rc;
> +}
> +
> +/*
> + * Handle debug exception notifications.
> + */
> +int __kprobes hw_breakpoint_exceptions_notify(
> + struct notifier_block *unused, unsigned long val, void *data)
> +{
> + if (val != DIE_DEBUG)
> + return NOTIFY_DONE;
> +
> + return hw_breakpoint_handler(data);
> +}
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-11 11:53 ` [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
@ 2009-05-28 6:42 ` David Gibson
2009-05-29 9:01 ` K.Prasad
0 siblings, 1 reply; 40+ messages in thread
From: David Gibson @ 2009-05-28 6:42 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
> From: Alan Stern <stern@rowland.harvard.edu>
>
> This patch enables the use of abstract debug registers in
> process-handling routines.
[snip]
>
> + p->thread.io_bitmap_ptr = NULL;
Why is manipulating the io_bitmap_ptr relevant to debug register
handling?
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces
2009-05-28 5:28 ` David Gibson
@ 2009-05-28 11:10 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-28 11:10 UTC (permalink / raw)
To: David Gibson
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Thu, May 28, 2009 at 03:28:59PM +1000, David Gibson wrote:
> On Mon, May 11, 2009 at 05:22:13PM +0530, K.Prasad wrote:
> > This patch introduces header files containing constants, structure
> > definitions and declaration of functions used by the hardware
> > breakpoint interface code.
>
> Hrm. I don't really like splitting prototypes from their
> implementations as you have done. Patches in a series should be split
> into logical, self-contained units, not just sliced up by file.
>
> [snip]
The first patch is one that prepares the kernel tree for the other patches
on the stack and still allow compilation of the kernel. This patch could
have been dismantled entirely, while the prototypes and header files moved
along with the corresponding .c files but it would have made the resultant
patch much bigger.
> > Index: include/asm-generic/hw_breakpoint.h
> > ===================================================================
> > --- /dev/null
> > +++ include/asm-generic/hw_breakpoint.h
> > @@ -0,0 +1,139 @@
> > +#ifndef _ASM_GENERIC_HW_BREAKPOINT_H
> > +#define _ASM_GENERIC_HW_BREAKPOINT_H
>
> Is there a reason this is asm-generic, rather than
> linux/hw_breakpoint.h?
>
None that I know of! It's a 'legacy' that I carried from Alan Stern's
original patchset.
> > +#ifndef __ARCH_HW_BREAKPOINT_H
> > +#error "Please don't include this file directly"
> > +#endif
> > +
> > +#ifdef __KERNEL__
> > +#include <linux/list.h>
> > +#include <linux/types.h>
> > +#include <linux/kallsyms.h>
> > +
> > +/**
> > + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint
> > + * @triggered: callback invoked after target address access
>
> "triggered" seems an odd name to me - it suggests a boolean flag, not
> a function pointer.
>
Re-reading the word 'triggered' and checking its usage in the existing
kernel I realise that it could have been named something better (handler?).
This certainly joins the list of enhancements I intend to make on the
patchset in the future (perhaps after it makes it to the mainline).
> > + * @info: arch-specific breakpoint info (address, length, and type)
> > + *
> > + * %hw_breakpoint structures are the kernel's way of representing
> > + * hardware breakpoints. These are data breakpoints
> > + * (also known as "watchpoints", triggered on data access), and the breakpoint's
> > + * target address can be located in either kernel space or user space.
>
> I thought this infrastructure could also handle instruction
> breakpoints. The comment above seems to contradict that.
>
Agreed, the comment reflects the old state of the patch when user-space
interfaces weren't exported, and HW_BREAKPOINT_EXECUTE was confined only
to ptrace. Now that they are exported, a kernel-module can request a
breakpoint register to watch for 'execute' operations on instructions. I
will update the comment.
> > + * The breakpoint's address, length, and type are highly
> > + * architecture-specific. The values are encoded in the @info field; you
> > + * specify them when registering the breakpoint. To examine the encoded
> > + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
> > + * below.
> > + *
> > + * The address is specified as a regular kernel pointer (for kernel-space
> > + * breakponts) or as an %__user pointer (for user-space breakpoints).
> > + * With register_user_hw_breakpoint(), the address must refer to a
> > + * location in user space. The breakpoint will be active only while the
> > + * requested task is running. Conversely with
> > + * register_kernel_hw_breakpoint(), the address must refer to a location
> > + * in kernel space, and the breakpoint will be active on all CPUs
> > + * regardless of the current task.
> > + *
> > + * The length is the breakpoint's extent in bytes, which is subject to
> > + * certain limitations. include/asm/hw_breakpoint.h contains macros
> > + * defining the available lengths for a specific architecture. Note that
> > + * the address's alignment must match the length. The breakpoint will
> > + * catch accesses to any byte in the range from address to address +
> > + * (length - 1).
> > + *
> > + * The breakpoint's type indicates the sort of access that will cause it
> > + * to trigger. Possible values may include:
> > + *
> > + * %HW_BREAKPOINT_RW (triggered on read or write access),
> > + * %HW_BREAKPOINT_WRITE (triggered on write access), and
> > + * %HW_BREAKPOINT_READ (triggered on read access).
> > + *
> > + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
> > + * possibilities are available on all architectures. Execute breakpoints
> > + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
>
> Why do instruction breakpoints have a special length, rather than a
> special type? Or do they have both?
>
Both. The len for a execute breakpoint is HW_BREAKPOINT_LEN_EXECUTE and
type HW_BREAKPOINT_EXECUTE.
> > + * When a breakpoint gets hit, the @triggered callback is
> > + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
> > + * processor registers.
> > + * Data breakpoints occur after the memory access has taken place.
> > + * Breakpoints are disabled during execution @triggered, to avoid
> > + * recursive traps and allow unhindered access to breakpointed memory.
> > + *
> > + * This sample code sets a breakpoint on pid_max and registers a callback
> > + * function for writes to that variable. Note that it is not portable
> > + * as written, because not all architectures support HW_BREAKPOINT_LEN_4.
> > + *
> > + * ----------------------------------------------------------------------
> > + *
> > + * #include <asm/hw_breakpoint.h>
> > + *
> > + * struct hw_breakpoint my_bp;
> > + *
> > + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
> > + * {
> > + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
> > + * dump_stack();
> > + * .......<more debugging output>........
> > + * }
> > + *
> > + * static struct hw_breakpoint my_bp;
> > + *
> > + * static int init_module(void)
> > + * {
> > + * ..........<do anything>............
> > + * my_bp.info.type = HW_BREAKPOINT_WRITE;
> > + * my_bp.info.len = HW_BREAKPOINT_LEN_4;
> > + *
> > + * my_bp.installed = (void *)my_bp_installed;
> > + *
> > + * rc = register_kernel_hw_breakpoint(&my_bp);
> > + * ..........<do anything>............
> > + * }
> > + *
> > + * static void cleanup_module(void)
> > + * {
> > + * ..........<do anything>............
> > + * unregister_kernel_hw_breakpoint(&my_bp);
> > + * ..........<do anything>............
> > + * }
> > + *
> > + * ----------------------------------------------------------------------
> > + */
> > +struct hw_breakpoint {
> > + void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
> > + struct arch_hw_breakpoint info;
> > +};
> > +
> > +/*
> > + * len and type values are defined in include/asm/hw_breakpoint.h.
> > + * Available values vary according to the architecture. On i386 the
> > + * possibilities are:
> > + *
> > + * HW_BREAKPOINT_LEN_1
> > + * HW_BREAKPOINT_LEN_2
> > + * HW_BREAKPOINT_LEN_4
> > + * HW_BREAKPOINT_RW
> > + * HW_BREAKPOINT_READ
> > + *
> > + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the
> > + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be
> > + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time.
> > + */
> > +
> > +int register_user_hw_breakpoint(struct task_struct *tsk,
> > + struct hw_breakpoint *bp);
> > +void unregister_user_hw_breakpoint(struct task_struct *tsk,
> > + struct hw_breakpoint *bp);
> > +/*
> > + * Kernel breakpoints are not associated with any particular thread.
> > + */
> > +int register_kernel_hw_breakpoint(struct hw_breakpoint *bp);
> > +void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp);
> > +void switch_to_none_hw_breakpoint(void);
> > +
> > +extern unsigned int hbp_kernel_pos;
> > +extern spinlock_t hw_breakpoint_lock;
> > +
> > +#endif /* __KERNEL__ */
> > +#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */
> > Index: arch/x86/include/asm/hw_breakpoint.h
> > ===================================================================
> > --- /dev/null
> > +++ arch/x86/include/asm/hw_breakpoint.h
> > @@ -0,0 +1,66 @@
> > +#ifndef _I386_HW_BREAKPOINT_H
> > +#define _I386_HW_BREAKPOINT_H
> > +
> > +#ifdef __KERNEL__
> > +#define __ARCH_HW_BREAKPOINT_H
> > +
> > +struct arch_hw_breakpoint {
> > + char *name; /* Contains name of the symbol to set bkpt */
> > + unsigned long address;
> > + u8 len;
> > + u8 type;
> > +};
> > +
> > +#include <linux/kdebug.h>
> > +#include <asm-generic/hw_breakpoint.h>
> > +
> > +/* Available HW breakpoint length encodings */
> > +#define HW_BREAKPOINT_LEN_1 0x40
> > +#define HW_BREAKPOINT_LEN_2 0x44
> > +#define HW_BREAKPOINT_LEN_4 0x4c
> > +#define HW_BREAKPOINT_LEN_EXECUTE 0x40
> > +
> > +#ifdef CONFIG_X86_64
> > +#define HW_BREAKPOINT_LEN_8 0x48
> > +#endif
> > +
> > +/* Available HW breakpoint type encodings */
> > +
> > +/* trigger on instruction execute */
> > +#define HW_BREAKPOINT_EXECUTE 0x80
> > +/* trigger on memory write */
> > +#define HW_BREAKPOINT_WRITE 0x81
> > +/* trigger on memory read or write */
> > +#define HW_BREAKPOINT_RW 0x83
>
> What is the reason for making the encoding of all these values
> arch-specific? How would you set up the encoding for an arch where
> different CPU variants have different debug facilities (as we do on
> powerpc).
>
I'm responding to this comment in a reply to this related mail of yours:
http://lkml.org/lkml/2009/5/28/43.
> > +/* Total number of available HW breakpoint registers */
> > +#define HB_NUM 4
> > +
> > +extern struct hw_breakpoint *hbp_kernel[HB_NUM];
> > +DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
> > +extern unsigned int hbp_user_refcount[HB_NUM];
> > +
> > +/*
> > + * Ptrace support: breakpoint trigger routine.
> > + */
> > +int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
> > + struct hw_breakpoint *bp);
> > +int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk,
> > + struct hw_breakpoint *bp);
> > +void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk);
> > +
> > +void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
> > +void arch_uninstall_thread_hw_breakpoint(void);
> > +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
> > +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len);
> > +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
> > + struct task_struct *tsk);
> > +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
> > +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
> > +void arch_update_kernel_hw_breakpoint(void *);
> > +int hw_breakpoint_handler(struct die_args *args);
> > +int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
> > + unsigned long val, void *data);
>
> It looks like most if not all of these functions should have the same
> prototype (though a different implementation) on all archs. In which
> case the prototypes should go in a generic header.
>
Some of the functions have different signatures across archs too (like
arch_check_va_in_userspace and arch_check_va_in_kernelspace). While your
comments are based on an older version of the patchset, the recent one
(http://lkml.org/lkml/2009/5/21/143) has a more concise prototype declaration.
> > +#endif /* __KERNEL__ */
> > +#endif /* _I386_HW_BREAKPOINT_H */
> > +
> > Index: arch/x86/include/asm/debugreg.h
> > ===================================================================
> > --- arch/x86/include/asm/debugreg.h.orig
> > +++ arch/x86/include/asm/debugreg.h
> > @@ -18,6 +18,7 @@
> > #define DR_TRAP1 (0x2) /* db1 */
> > #define DR_TRAP2 (0x4) /* db2 */
> > #define DR_TRAP3 (0x8) /* db3 */
> > +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
> >
> > #define DR_STEP (0x4000) /* single-step */
> > #define DR_SWITCH (0x8000) /* task switch */
> > @@ -49,6 +50,8 @@
> >
> > #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
> > #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
> > +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
> > +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
> > #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
> >
> > #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
> > @@ -67,4 +70,32 @@
> > #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
> > #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
> >
> > +/*
> > + * HW breakpoint additions
> > + */
> > +#ifdef __KERNEL__
> > +
> > +/* For process management */
> > +void flush_thread_hw_breakpoint(struct task_struct *tsk);
> > +int copy_thread_hw_breakpoint(struct task_struct *tsk,
> > + struct task_struct *child, unsigned long clone_flags);
> > +void dump_thread_hw_breakpoint(struct task_struct *tsk, int u_debugreg[8]);
> > +void switch_to_thread_hw_breakpoint(struct task_struct *tsk);
> > +
> > +/* For CPU management */
> > +void load_debug_registers(void);
> > +static inline void hw_breakpoint_disable(void)
> > +{
> > + /* Zero the control register for HW Breakpoint */
> > + set_debugreg(0UL, 7);
> > +
> > + /* Zero-out the individual HW breakpoint address registers */
> > + set_debugreg(0UL, 0);
> > + set_debugreg(0UL, 1);
> > + set_debugreg(0UL, 2);
> > + set_debugreg(0UL, 3);
> > +}
> > +
> > +#endif /* __KERNEL__ */
> > +
> > #endif /* _ASM_X86_DEBUGREG_H */
> > Index: arch/x86/include/asm/processor.h
> > ===================================================================
> > --- arch/x86/include/asm/processor.h.orig
> > +++ arch/x86/include/asm/processor.h
> > @@ -29,6 +29,7 @@ struct mm_struct;
> > #include <linux/threads.h>
> > #include <linux/init.h>
> >
> > +#define HB_NUM 4
>
> Uh.. this #define appears to be duplicated in asm-x86/hw_breakpoint.h
> and here.
>
While it has been the thinking that there's no suitable header file that
is common to processor.h and other users of HBP_NUM in which this
definition can be made that caused this duplicity, I realise that
debugreg.h could have been used.
I will move the definition of HBP_NUM to arch/x86/include/asm/debugreg.h
and include the same in this file.
> > /*
> > * Default implementation of macro that returns current
> > * instruction pointer ("program counter").
> > @@ -435,8 +436,11 @@ struct thread_struct {
> > unsigned long debugreg1;
> > unsigned long debugreg2;
> > unsigned long debugreg3;
> > + unsigned long debugreg[HB_NUM];
> > unsigned long debugreg6;
> > unsigned long debugreg7;
> > + /* Hardware breakpoint info */
> > + struct hw_breakpoint *hbp[HB_NUM];
> > /* Fault info: */
> > unsigned long cr2;
> > unsigned long trap_no;
> >
>
> --
> David Gibson | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
> | _way_ _around_!
> http://www.ozlabs.org/~dgibson
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 02/12] Introducing generic hardware breakpoint handler interfaces
2009-05-28 6:15 ` David Gibson
@ 2009-05-28 11:55 ` K.Prasad
2009-05-29 2:59 ` David Gibson
0 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-28 11:55 UTC (permalink / raw)
To: David Gibson
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Thu, May 28, 2009 at 04:15:18PM +1000, David Gibson wrote:
> On Mon, May 11, 2009 at 05:22:40PM +0530, K.Prasad wrote:
> > This patch introduces the generic Hardware Breakpoint interfaces for
> > both user and kernel space requests.
>
> [snip]
> > Index: kernel/hw_breakpoint.c
> > ===================================================================
> > --- /dev/null
> > +++ kernel/hw_breakpoint.c
> > @@ -0,0 +1,367 @@
> > +/*
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> > + *
> > + * Copyright (C) 2007 Alan Stern
> > + * Copyright (C) IBM Corporation, 2009
> > + */
> > +
> > +/*
> > + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
> > + * using the CPU's debug registers.
> > + *
> > + * This file contains the arch-independent routines. It is not meant
> > + * to be compiled as a standalone source file; rather it should be
> > + * #include'd by the arch-specific implementation.
> > + */
> > +
> > +#include <linux/irqflags.h>
> > +#include <linux/kallsyms.h>
> > +#include <linux/notifier.h>
> > +#include <linux/kprobes.h>
> > +#include <linux/kdebug.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/percpu.h>
> > +#include <linux/mutex.h>
> > +#include <linux/sched.h>
> > +#include <linux/init.h>
> > +#include <linux/smp.h>
> > +
> > +#include <asm/hw_breakpoint.h>
> > +#include <asm/processor.h>
> > +
> > +#ifdef CONFIG_X86
>
> That's a nasty #ifdef. Surely this include should instead go in the
> x86 version of hw_breakpoint.h.
>
Agreed. I will make the change.
> > +#include <asm/debugreg.h>
> > +#endif
> > +/*
> > + * Spinlock that protects all (un)register operations over kernel/user-space
> > + * breakpoint requests
> > + */
> > +DEFINE_SPINLOCK(hw_breakpoint_lock);
> > +
> > +/* Array of kernel-space breakpoint structures */
> > +struct hw_breakpoint *hbp_kernel[HB_NUM];
> > +
> > +/*
> > + * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
> > + * modified but we need the older copy to handle any hbp exceptions. It will
> > + * sync with hbp_kernel[] value after updation is done through IPIs.
> > + */
> > +DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
>
> Ok.. this seems to assume that all breakpoints are equal as far as
> allocation goes - i.e. that the total number of breakpoints is the
> only allocation limit. That's apparently true for x86, but on
> embedded powerpc (4xx), for example, we can have up to 2 data
> breakpoints and up to 4 instruction breakpoints independent of each
> other.
>
True. But I hope that you see that the infrastructure is extensible.
While the generic infrastructure has served the purposes of x86 and
PPC64 well, one may want to 'grow' it when implementing breakpoints for
4xx processors. Say, make hbp_kernel[] a 2-D array, one for each
HBP_TYPE (say DAC and IAC) and hbp_kernel_pos into an array (of size
HBP_TYPE).
> > +/*
> > + * Kernel breakpoints grow downwards, starting from HB_NUM
> > + * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for
> > + * kernel-space request. We will initialise it here and not in an __init
> > + * routine because load_debug_registers(), which uses this variable can be
> > + * called very early during CPU initialisation.
> > + */
> > +unsigned int hbp_kernel_pos = HB_NUM;
>
> Likewise here. For 44x we would need separate tables and separate
> pointers for data and instruction breakpoints.
>
> > +/*
> > + * An array containing refcount of threads using a given bkpt register
> > + * Accesses are synchronised by acquiring hw_breakpoint_lock
> > + */
> > +unsigned int hbp_user_refcount[HB_NUM];
> > +
> > +/*
> > + * Install the debug register values for a new thread.
> > + */
> > +void switch_to_thread_hw_breakpoint(struct task_struct *tsk)
> > +{
> > + /* Set the debug register */
> > + arch_install_thread_hw_breakpoint(tsk);
>
> Um.. and the point of this wrapper which does nothing extra around the
> arch-specific function would be...?
>
> > +}
> > +
> > +/*
> > + * Install the debug register values for just the kernel, no thread.
> > + */
> > +void switch_to_none_hw_breakpoint(void)
> > +{
> > + arch_uninstall_thread_hw_breakpoint();
>
> And again.
>
These functions have hence been removed. You can find the latest
patchset here: http://lkml.org/lkml/2009/5/21/143.
> > +}
> > +
> > +/*
> > + * Load the debug registers during startup of a CPU.
> > + */
> > +void load_debug_registers(void)
>
> This name does not suggest a boot and/or cpu hotplug only function.
>
I think the name aptly describes what the function achieves
(irrespective of where it is employed). Let us know if you have better
suggestions.
> > +{
> > + unsigned long flags;
> > + struct task_struct *tsk = current;
> > +
> > + spin_lock_bh(&hw_breakpoint_lock);
> > +
> > + /* Prevent IPIs for new kernel breakpoint updates */
> > + local_irq_save(flags);
> > + arch_update_kernel_hw_breakpoint(NULL);
> > + local_irq_restore(flags);
> > +
> > + if (test_tsk_thread_flag(tsk, TIF_DEBUG))
> > + switch_to_thread_hw_breakpoint(tsk);
> > +
> > + spin_unlock_bh(&hw_breakpoint_lock);
> > +}
> > +
> > +/*
> > + * Erase all the hardware breakpoint info associated with a thread.
> > + *
> > + * If tsk != current then tsk must not be usable (for example, a
> > + * child being cleaned up from a failed fork).
> > + */
> > +void flush_thread_hw_breakpoint(struct task_struct *tsk)
> > +{
> > + int i;
> > + struct thread_struct *thread = &(tsk->thread);
> > +
> > + spin_lock_bh(&hw_breakpoint_lock);
> > +
> > + /* The thread no longer has any breakpoints associated with it */
> > + clear_tsk_thread_flag(tsk, TIF_DEBUG);
> > + for (i = 0; i < HB_NUM; i++) {
> > + if (thread->hbp[i]) {
> > + hbp_user_refcount[i]--;
> > + kfree(thread->hbp[i]);
> > + thread->hbp[i] = NULL;
> > + }
> > + }
> > +
> > + arch_flush_thread_hw_breakpoint(tsk);
> > +
> > + /* Actually uninstall the breakpoints if necessary */
> > + if (tsk == current)
> > + switch_to_none_hw_breakpoint();
> > + spin_unlock_bh(&hw_breakpoint_lock);
> > +}
> > +
> > +/*
> > + * Copy the hardware breakpoint info from a thread to its cloned
> > child.
>
> This comment contradicts what the code actually does.
>
Agreed. Will change it.
> > + */
> > +int copy_thread_hw_breakpoint(struct task_struct *tsk,
> > + struct task_struct *child, unsigned long clone_flags)
> > +{
> > + /*
> > + * We will assume that breakpoint settings are not inherited
> > + * and the child starts out with no debug registers set.
> > + * But what about CLONE_PTRACE?
> > + */
> > + clear_tsk_thread_flag(child, TIF_DEBUG);
> > +
> > + /* We will call flush routine since the debugregs are not inherited */
> > + arch_flush_thread_hw_breakpoint(child);
>
> > +
> > + return 0;
> > +}
> > +
> > +int __register_user_hw_breakpoint(int pos, struct task_struct *tsk,
> > + struct hw_breakpoint *bp)
> > +{
> > + struct thread_struct *thread = &(tsk->thread);
> > + int rc;
> > +
> > + /* Do not overcommit. Fail if kernel has used the hbp registers */
> > + if (pos >= hbp_kernel_pos)
> > + return -ENOSPC;
>
> Again, this is generic code making a non-portable assumption about how
> breakpoints must be allocated.
>
As explained before, this function and its kernel counterparts can be
extended to accomodate newer processors which will be supported by this
infrastructure.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces
2009-05-28 6:35 ` David Gibson
@ 2009-05-28 13:41 ` K.Prasad
2009-05-29 3:15 ` David Gibson
0 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-28 13:41 UTC (permalink / raw)
To: David Gibson
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Thu, May 28, 2009 at 04:35:15PM +1000, David Gibson wrote:
> On Mon, May 11, 2009 at 05:23:03PM +0530, K.Prasad wrote:
> > This patch introduces the arch-specific implementation of
> > hw_breakpoint.c inside x86 specific directories. They contain
> > functions which help validate and serve requests for using Hardware
> > Breakpoint registers on x86 processors.
>
> [snip]
> > +static int get_hbp_len(u8 hbp_len)
> > +{
> > + unsigned int len_in_bytes = 0;
> > +
> > + switch (hbp_len) {
> > + case HW_BREAKPOINT_LEN_1:
> > + len_in_bytes = 1;
> > + break;
> > + case HW_BREAKPOINT_LEN_2:
> > + len_in_bytes = 2;
> > + break;
> > + case HW_BREAKPOINT_LEN_4:
> > + len_in_bytes = 4;
> > + break;
> > +#ifdef CONFIG_X86_64
> > + case HW_BREAKPOINT_LEN_8:
> > + len_in_bytes = 8;
> > + break;
> > +#endif
>
> Hrm, the fact that you have to do this nasty back-conversion again
> makes me wonder at the wisdom of having per-arch encodings for
> breakpoint length.
>
If you're suggesting that users would specify the length of the requested
breakpoint directly as numerals (like 2, 4 or 8 bytes) and not
pre-defined constants such as HW_BREAKPOINT_LEN, it has two implications
to it. By defining the permissible values for breakpoint length, the
user is assured that the request for breakpoint registration is valid
(atleast in terms of length) and does not have to refer the
code/processor manual for supported address lengths on the host
processor.
On the other hand, it may be perceived as being less user-friendly as
you cited above.
Given the above reasoning, do you prefer to allow numeric values
specified by the user?
> > + }
> > + return len_in_bytes;
> > +}
> > +
> > +/*
> > + * Check for virtual address in user space.
> > + */
> > +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
> > +{
> > + unsigned int len;
> > +
> > + len = get_hbp_len(hbp_len);
> > +
> > + return (va <= TASK_SIZE - len);
> > +}
> > +
> > +/*
> > + * Check for virtual address in kernel space.
> > + */
> > +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
> > +{
> > + unsigned int len;
> > +
> > + len = get_hbp_len(hbp_len);
> > +
> > + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
> > +}
> > +
> > +/*
> > + * Store a breakpoint's encoded address, length, and type.
> > + */
> > +static int arch_store_info(struct hw_breakpoint *bp)
>
> This function doesn't look very arch specific. Nor is the name very
> meaningful.
>
The function populates arch-specific fields and hence the arch_ prefix.
I am open to any suggestions that will be incorporated in future
enhancements.
> > +{
> > + /*
> > + * User-space requests will always have the address field populated
> > + * For kernel-addresses, either the address or symbol name can be
> > + * specified.
> > + */
> > + if (bp->info.name)
> > + bp->info.address = (unsigned long)
> > + kallsyms_lookup_name(bp->info.name);
> > + if (bp->info.address)
> > + return 0;
> > + return -EINVAL;
> > +}
> > +
> > +/*
> > + * Validate the arch-specific HW Breakpoint register settings
> > + */
> > +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp,
> > + struct task_struct *tsk)
> > +{
> > + unsigned int align;
> > + int ret = -EINVAL;
> > +
> > + switch (bp->info.type) {
> > + /*
> > + * Ptrace-refactoring code
> > + * For now, we'll allow instruction breakpoint only for user-space
> > + * addresses
> > + */
> > + case HW_BREAKPOINT_EXECUTE:
> > + if ((!arch_check_va_in_userspace(bp->info.address,
> > + bp->info.len)) &&
> > + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE)
> > + return ret;
> > + break;
> > + case HW_BREAKPOINT_WRITE:
> > + break;
> > + case HW_BREAKPOINT_RW:
> > + break;
> > + default:
> > + return ret;
> > + }
> > +
> > + switch (bp->info.len) {
> > + case HW_BREAKPOINT_LEN_1:
> > + align = 0;
> > + break;
> > + case HW_BREAKPOINT_LEN_2:
> > + align = 1;
> > + break;
> > + case HW_BREAKPOINT_LEN_4:
> > + align = 3;
> > + break;
> > +#ifdef CONFIG_X86_64
> > + case HW_BREAKPOINT_LEN_8:
> > + align = 7;
> > + break;
> > +#endif
> > + default:
> > + return ret;
> > + }
> > +
> > + if (bp->triggered)
> > + ret = arch_store_info(bp);
> > +
> > + if (ret < 0)
> > + return ret;
> > + /*
> > + * Check that the low-order bits of the address are appropriate
> > + * for the alignment implied by len.
> > + */
> > + if (bp->info.address & align)
> > + return -EINVAL;
> > +
> > + /* Check that the virtual address is in the proper range */
> > + if (tsk) {
> > + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len))
> > + return -EFAULT;
> > + } else {
> > + if (!arch_check_va_in_kernelspace(bp->info.address,
> > + bp->info.len))
> > + return -EFAULT;
> > + }
> > + return 0;
> > +}
> > +
> > +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk)
> > +{
> > + struct thread_struct *thread = &(tsk->thread);
> > + struct hw_breakpoint *bp = thread->hbp[pos];
> > +
> > + thread->debugreg7 &= ~dr7_masks[pos];
> > + if (bp) {
> > + thread->debugreg[pos] = bp->info.address;
> > + thread->debugreg7 |= encode_dr7(pos, bp->info.len,
> > + bp->info.type);
> > + } else
> > + thread->debugreg[pos] = 0;
> > +}
> > +
> > +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk)
> > +{
> > + int i;
> > + struct thread_struct *thread = &(tsk->thread);
> > +
> > + thread->debugreg7 = 0;
> > + for (i = 0; i < HB_NUM; i++)
> > + thread->debugreg[i] = 0;
> > +}
> > +
> > +/*
> > + * Handle debug exception notifications.
> > + */
> > +int __kprobes hw_breakpoint_handler(struct die_args *args)
> > +{
> > + int i, cpu, rc = NOTIFY_STOP;
> > + struct hw_breakpoint *bp;
> > + /* The DR6 value is stored in args->err */
> > + unsigned long dr7, dr6 = args->err;
> > +
> > + /* Do an early return if no trap bits are set in DR6 */
> > + if ((dr6 & DR_TRAP_BITS) == 0)
> > + return NOTIFY_DONE;
> > +
> > + /* Lazy debug register switching */
> > + if (!test_tsk_thread_flag(current, TIF_DEBUG))
> > + switch_to_none_hw_breakpoint();
>
> Shouldn't you also drop out of the handler here, rather than having to
> again check for a false alarm below.
>
No. We could be inside the handler because of a kernel-space breakpoint
and still have the above condition true.
> It's also not immediately clear to me how lazy switching buys you
> anything here, since it's only lazy switching off, not lazy switching
> on.
>
In this code-snippet from __switch_to() in arch/x86/kernel/process_32.c,
if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
arch_install_thread_hw_breakpoint(next_p);
we only check if 'next_p' has TIF_DEBUG flag set in order to enable
breakpoints, but don't clear the physical debug registers if 'prev'
process had used them (can be detected by testing for TIF_DEBUG).
This saves us one additional check in the hot-path but we might
encounter stray exceptions due to lazy debug register switching.
Such exceptions are detected in the above code, resulting in the
physical debug registers being cleared-off their stale values and the
exception being ignored.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 02/12] Introducing generic hardware breakpoint handler interfaces
2009-05-28 11:55 ` K.Prasad
@ 2009-05-29 2:59 ` David Gibson
0 siblings, 0 replies; 40+ messages in thread
From: David Gibson @ 2009-05-29 2:59 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Thu, May 28, 2009 at 05:25:46PM +0530, K.Prasad wrote:
> On Thu, May 28, 2009 at 04:15:18PM +1000, David Gibson wrote:
> > On Mon, May 11, 2009 at 05:22:40PM +0530, K.Prasad wrote:
[snip]
> > > +/*
> > > + * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being
> > > + * modified but we need the older copy to handle any hbp exceptions. It will
> > > + * sync with hbp_kernel[] value after updation is done through IPIs.
> > > + */
> > > +DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HB_NUM]);
> >
> > Ok.. this seems to assume that all breakpoints are equal as far as
> > allocation goes - i.e. that the total number of breakpoints is the
> > only allocation limit. That's apparently true for x86, but on
> > embedded powerpc (4xx), for example, we can have up to 2 data
> > breakpoints and up to 4 instruction breakpoints independent of each
> > other.
>
> True. But I hope that you see that the infrastructure is extensible.
> While the generic infrastructure has served the purposes of x86 and
> PPC64 well, one may want to 'grow' it when implementing breakpoints for
> 4xx processors. Say, make hbp_kernel[] a 2-D array, one for each
> HBP_TYPE (say DAC and IAC) and hbp_kernel_pos into an array (of size
> HBP_TYPE).
Hrm, I guess. I suppose since there's no userspace exposed interface
to this new infrastructure (other than the backwards compatible ptrace
one), then this is indeed a deficiency which could be fixed up later.
[snip]
> > > +void switch_to_thread_hw_breakpoint(struct task_struct *tsk)
> > > +{
> > > + /* Set the debug register */
> > > + arch_install_thread_hw_breakpoint(tsk);
> >
> > Um.. and the point of this wrapper which does nothing extra around the
> > arch-specific function would be...?
> >
> > > +}
> > > +
> > > +/*
> > > + * Install the debug register values for just the kernel, no thread.
> > > + */
> > > +void switch_to_none_hw_breakpoint(void)
> > > +{
> > > + arch_uninstall_thread_hw_breakpoint();
> >
> > And again.
> >
>
> These functions have hence been removed. You can find the latest
> patchset here: http://lkml.org/lkml/2009/5/21/143.
Oh, sorry. I thought I'd grabbed the latest patchset, but apparently
not.
[snip]
> > > +/*
> > > + * Load the debug registers during startup of a CPU.
> > > + */
> > > +void load_debug_registers(void)
> >
> > This name does not suggest a boot and/or cpu hotplug only function.
>
> I think the name aptly describes what the function achieves
> (irrespective of where it is employed). Let us know if you have better
> suggestions.
Hrm, not really. To me, "load_debug_registers" suggests the operation
that happens in during context switch as we load the new threads debug
setup into the hardware registers. AFAICT that's only part of what
this function does, and it's *not* typically called during context
switch.
How about "init_debug_registers()"?
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces
2009-05-28 13:41 ` K.Prasad
@ 2009-05-29 3:15 ` David Gibson
0 siblings, 0 replies; 40+ messages in thread
From: David Gibson @ 2009-05-29 3:15 UTC (permalink / raw)
To: K.Prasad
Cc: Alan Stern, Steven Rostedt, Frederic Weisbecker, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Thu, May 28, 2009 at 07:11:51PM +0530, K.Prasad wrote:
> On Thu, May 28, 2009 at 04:35:15PM +1000, David Gibson wrote:
> > On Mon, May 11, 2009 at 05:23:03PM +0530, K.Prasad wrote:
> > > This patch introduces the arch-specific implementation of
> > > hw_breakpoint.c inside x86 specific directories. They contain
> > > functions which help validate and serve requests for using Hardware
> > > Breakpoint registers on x86 processors.
> >
> > [snip]
> > > +static int get_hbp_len(u8 hbp_len)
> > > +{
> > > + unsigned int len_in_bytes = 0;
> > > +
> > > + switch (hbp_len) {
> > > + case HW_BREAKPOINT_LEN_1:
> > > + len_in_bytes = 1;
> > > + break;
> > > + case HW_BREAKPOINT_LEN_2:
> > > + len_in_bytes = 2;
> > > + break;
> > > + case HW_BREAKPOINT_LEN_4:
> > > + len_in_bytes = 4;
> > > + break;
> > > +#ifdef CONFIG_X86_64
> > > + case HW_BREAKPOINT_LEN_8:
> > > + len_in_bytes = 8;
> > > + break;
> > > +#endif
> >
> > Hrm, the fact that you have to do this nasty back-conversion again
> > makes me wonder at the wisdom of having per-arch encodings for
> > breakpoint length.
>
> If you're suggesting that users would specify the length of the requested
> breakpoint directly as numerals (like 2, 4 or 8 bytes) and not
> pre-defined constants such as HW_BREAKPOINT_LEN, it has two implications
> to it. By defining the permissible values for breakpoint length, the
> user is assured that the request for breakpoint registration is valid
> (atleast in terms of length) and does not have to refer the
> code/processor manual for supported address lengths on the host
> processor.
I don't see why looking up permitted breakpoint lengths in the
processor manual (or a summary thereof) is any harder than looking up
the permitted breakpoint length constants for this arch in the headers
or breakpoint interface documentation.
Changing the length to be specified as integers would move enforcement
of this check from compile time to runtime. It's not yet clear to me
which is better. Compile time checking is often good. But I think
we'll need runtime checking as well for architectures where different
cpu generations have different debug facilities with different
capabilities.
> On the other hand, it may be perceived as being less user-friendly
> as you cited above.
>
> Given the above reasoning, do you prefer to allow numeric values
> specified by the user?
I'm not sure yet.
> > > + }
> > > + return len_in_bytes;
> > > +}
> > > +
> > > +/*
> > > + * Check for virtual address in user space.
> > > + */
> > > +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
> > > +{
> > > + unsigned int len;
> > > +
> > > + len = get_hbp_len(hbp_len);
> > > +
> > > + return (va <= TASK_SIZE - len);
> > > +}
> > > +
> > > +/*
> > > + * Check for virtual address in kernel space.
> > > + */
> > > +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
> > > +{
> > > + unsigned int len;
> > > +
> > > + len = get_hbp_len(hbp_len);
> > > +
> > > + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
> > > +}
> > > +
> > > +/*
> > > + * Store a breakpoint's encoded address, length, and type.
> > > + */
> > > +static int arch_store_info(struct hw_breakpoint *bp)
> >
> > This function doesn't look very arch specific. Nor is the name very
> > meaningful.
> >
>
> The function populates arch-specific fields and hence the arch_ prefix.
> I am open to any suggestions that will be incorporated in future
> enhancements.
Hrm, I suppose. I guess the name lookup looks as if it ought to be
possible to make generic.
[snip]
> > > +/*
> > > + * Handle debug exception notifications.
> > > + */
> > > +int __kprobes hw_breakpoint_handler(struct die_args *args)
> > > +{
> > > + int i, cpu, rc = NOTIFY_STOP;
> > > + struct hw_breakpoint *bp;
> > > + /* The DR6 value is stored in args->err */
> > > + unsigned long dr7, dr6 = args->err;
> > > +
> > > + /* Do an early return if no trap bits are set in DR6 */
> > > + if ((dr6 & DR_TRAP_BITS) == 0)
> > > + return NOTIFY_DONE;
> > > +
> > > + /* Lazy debug register switching */
> > > + if (!test_tsk_thread_flag(current, TIF_DEBUG))
> > > + switch_to_none_hw_breakpoint();
> >
> > Shouldn't you also drop out of the handler here, rather than having to
> > again check for a false alarm below.
>
> No. We could be inside the handler because of a kernel-space breakpoint
> and still have the above condition true.
Ah, yes, missed that.
But.. if it's a kernel breakpoint, surely the debug state of the
current thread shouldn't affect its actions at all. So shouldn't you
be checking whether it's a kernel or user breakpoint first?
> > It's also not immediately clear to me how lazy switching buys you
> > anything here, since it's only lazy switching off, not lazy switching
> > on.
>
> In this code-snippet from __switch_to() in arch/x86/kernel/process_32.c,
>
> if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
> arch_install_thread_hw_breakpoint(next_p);
>
> we only check if 'next_p' has TIF_DEBUG flag set in order to enable
> breakpoints, but don't clear the physical debug registers if 'prev'
> process had used them (can be detected by testing for TIF_DEBUG).
> This saves us one additional check in the hot-path but we might
> encounter stray exceptions due to lazy debug register switching.
>
> Such exceptions are detected in the above code, resulting in the
> physical debug registers being cleared-off their stale values and the
> exception being ignored.
Right, I understand what it does, just not whether it really buys
anything. It smells of premature optimization, since I don't think
the context switch path is actually hot enough to be bothered by one
flag check. But whatever.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-28 6:42 ` David Gibson
@ 2009-05-29 9:01 ` K.Prasad
2009-05-29 10:49 ` Frederic Weisbecker
0 siblings, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-29 9:01 UTC (permalink / raw)
To: David Gibson, Alan Stern, Frederic Weisbecker
Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List,
Andrew Morton, Benjamin Herrenschmidt, maneesh, Roland McGrath,
Masami Hiramatsu
On Thu, May 28, 2009 at 04:42:38PM +1000, David Gibson wrote:
> On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
> > From: Alan Stern <stern@rowland.harvard.edu>
> >
> > This patch enables the use of abstract debug registers in
> > process-handling routines.
>
> [snip]
> >
> > + p->thread.io_bitmap_ptr = NULL;
>
> Why is manipulating the io_bitmap_ptr relevant to debug register
> handling?
I *re-read* the patch but was unable to find how this change had sneaked
in. It shouldn't be there although it is harmless.
Hi Frederic,
I am attaching a new version of this patch 06/12 that:
- removes the line that assigns NULL to "p->thread.io_bitmap_ptr"
- Updates the comment in __switch_to() function which was stale (was
relevant when 'last_debugged_task' was used to detect lazy debug
register switching).
Kindly integrate this version in lieu of the older version sent here:
http://lkml.org/lkml/2009/5/21/149.
Hi Alan,
I'm retaining the
"Reviewed-by: Alan Stern <stern@rowland.harvard.edu>" tag on the patch
even after the above changes and guess it is acceptable to you. Please
let me know if you have any concerns about that.
Thanks,
K.Prasad
Use the new wrapper routines to access debug registers in process/thread code
This patch enables the use of abstract debug registers in
process-handling routines.
Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 27 +++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 63 insertions(+), 17 deletions(-)
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -106,14 +110,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -195,16 +194,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_32.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -263,6 +265,11 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -282,10 +289,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -424,6 +434,23 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_64.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -306,6 +310,11 @@ int copy_thread(unsigned long clone_flag
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -344,6 +353,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -489,6 +501,24 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-29 9:01 ` K.Prasad
@ 2009-05-29 10:49 ` Frederic Weisbecker
2009-05-29 13:52 ` K.Prasad
2009-05-29 13:54 ` Alan Stern
0 siblings, 2 replies; 40+ messages in thread
From: Frederic Weisbecker @ 2009-05-29 10:49 UTC (permalink / raw)
To: K.Prasad
Cc: David Gibson, Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Fri, May 29, 2009 at 02:31:46PM +0530, K.Prasad wrote:
> On Thu, May 28, 2009 at 04:42:38PM +1000, David Gibson wrote:
> > On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
> > > From: Alan Stern <stern@rowland.harvard.edu>
> > >
> > > This patch enables the use of abstract debug registers in
> > > process-handling routines.
> >
> > [snip]
> > >
> > > + p->thread.io_bitmap_ptr = NULL;
> >
> > Why is manipulating the io_bitmap_ptr relevant to debug register
> > handling?
>
> I *re-read* the patch but was unable to find how this change had sneaked
> in. It shouldn't be there although it is harmless.
When I reviewed this patch, I also ended stucked on it.
But actually I guess I found the sense, this is only for
convenience.
Look at the current copy_thread() in arch/x86/kernel/process32.c
If p->thread.io_bitmap_ptr fails to be duplicated, we set
p->thread.io_bitmap_max = 0 and return -ENOMEM
Now look at the patch.
If we fail to copy the hardware thread virtual registers we
want to exit with io_bitmap_ptr = NULL
If we fail to copy the io_bitmap, we want to free the breakpoint
and exit.
If we fail further, we want to free breakpoints and io_bitmap_ptr
The out section then tries to:
-free the breakpoints
-free p->thread.io_bitmap_ptr
So it's important to set io_bitmap_ptr to NULL so that
we know whether we have to release it or not.
> Hi Frederic,
> I am attaching a new version of this patch 06/12 that:
>
> - removes the line that assigns NULL to "p->thread.io_bitmap_ptr"
Dangerous. Unless p->thread.io_bitmap_ptr is already zeroed out
at this stage?
> - Updates the comment in __switch_to() function which was stale (was
> relevant when 'last_debugged_task' was used to detect lazy debug
> register switching).
>
> Kindly integrate this version in lieu of the older version sent here:
> http://lkml.org/lkml/2009/5/21/149.
Ok. Well it would be nice if you resend the whole series actually :)
Do you have another fix pending?
Thanks!
> Hi Alan,
> I'm retaining the
> "Reviewed-by: Alan Stern <stern@rowland.harvard.edu>" tag on the patch
> even after the above changes and guess it is acceptable to you. Please
> let me know if you have any concerns about that.
>
> Thanks,
> K.Prasad
>
>
> Use the new wrapper routines to access debug registers in process/thread code
>
> This patch enables the use of abstract debug registers in
> process-handling routines.
>
> Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
> ---
> arch/x86/kernel/process.c | 23 ++++++-----------------
> arch/x86/kernel/process_32.c | 27 +++++++++++++++++++++++++++
> arch/x86/kernel/process_64.c | 30 ++++++++++++++++++++++++++++++
> 3 files changed, 63 insertions(+), 17 deletions(-)
>
> Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process.c
> +++ linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
> @@ -17,6 +17,8 @@
> #include <asm/uaccess.h>
> #include <asm/i387.h>
> #include <asm/ds.h>
> +#include <asm/debugreg.h>
> +#include <asm/hw_breakpoint.h>
>
> unsigned long idle_halt;
> EXPORT_SYMBOL(idle_halt);
> @@ -48,6 +50,8 @@ void free_thread_xstate(struct task_stru
> kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
> tsk->thread.xstate = NULL;
> }
> + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
> + flush_thread_hw_breakpoint(tsk);
>
> WARN(tsk->thread.ds_ctx, "leaking DS context\n");
> }
> @@ -106,14 +110,9 @@ void flush_thread(void)
> }
> #endif
>
> - clear_tsk_thread_flag(tsk, TIF_DEBUG);
> + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
> + flush_thread_hw_breakpoint(tsk);
>
> - tsk->thread.debugreg0 = 0;
> - tsk->thread.debugreg1 = 0;
> - tsk->thread.debugreg2 = 0;
> - tsk->thread.debugreg3 = 0;
> - tsk->thread.debugreg6 = 0;
> - tsk->thread.debugreg7 = 0;
> memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
> /*
> * Forget coprocessor state..
> @@ -195,16 +194,6 @@ void __switch_to_xtra(struct task_struct
> else if (next->debugctlmsr != prev->debugctlmsr)
> update_debugctlmsr(next->debugctlmsr);
>
> - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
> - set_debugreg(next->debugreg0, 0);
> - set_debugreg(next->debugreg1, 1);
> - set_debugreg(next->debugreg2, 2);
> - set_debugreg(next->debugreg3, 3);
> - /* no 4 and 5 */
> - set_debugreg(next->debugreg6, 6);
> - set_debugreg(next->debugreg7, 7);
> - }
> -
> if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
> test_tsk_thread_flag(next_p, TIF_NOTSC)) {
> /* prev and next are different */
> Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_32.c
> +++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
> @@ -58,6 +58,8 @@
> #include <asm/idle.h>
> #include <asm/syscalls.h>
> #include <asm/ds.h>
> +#include <asm/debugreg.h>
> +#include <asm/hw_breakpoint.h>
>
> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
>
> @@ -263,6 +265,11 @@ int copy_thread(unsigned long clone_flag
> task_user_gs(p) = get_user_gs(regs);
>
> tsk = current;
> + err = -ENOMEM;
> + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
> + if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
> + goto out;
> +
> if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
> p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
> IO_BITMAP_BYTES, GFP_KERNEL);
> @@ -282,10 +289,13 @@ int copy_thread(unsigned long clone_flag
> err = do_set_thread_area(p, -1,
> (struct user_desc __user *)childregs->si, 0);
>
> +out:
> if (err && p->thread.io_bitmap_ptr) {
> kfree(p->thread.io_bitmap_ptr);
> p->thread.io_bitmap_max = 0;
> }
> + if (err)
> + flush_thread_hw_breakpoint(p);
>
> clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
> p->thread.ds_ctx = NULL;
> @@ -424,6 +434,23 @@ __switch_to(struct task_struct *prev_p,
> lazy_load_gs(next->gs);
>
> percpu_write(current_task, next_p);
> + /*
> + * There's a problem with moving the arch_install_thread_hw_breakpoint()
> + * call before current is updated. Suppose a kernel breakpoint is
> + * triggered in between the two, the hw-breakpoint handler will see that
> + * the 'current' task does not have TIF_DEBUG flag set and will think it
> + * is leftover from an old task (lazy switching) and will erase it. Then
> + * until the next context switch, no user-breakpoints will be installed.
> + *
> + * The real problem is that it's impossible to update both current and
> + * physical debug registers at the same instant, so there will always be
> + * a window in which they disagree and a breakpoint might get triggered.
> + * Since we use lazy switching, we are forced to assume that a
> + * disagreement means that current is correct and the exception is due
> + * to lazy debug register switching.
> + */
> + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
> + arch_install_thread_hw_breakpoint(next_p);
>
> return prev_p;
> }
> Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
> ===================================================================
> --- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_64.c
> +++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
> @@ -52,6 +52,8 @@
> #include <asm/idle.h>
> #include <asm/syscalls.h>
> #include <asm/ds.h>
> +#include <asm/debugreg.h>
> +#include <asm/hw_breakpoint.h>
>
> asmlinkage extern void ret_from_fork(void);
>
> @@ -245,6 +247,8 @@ void release_thread(struct task_struct *
> BUG();
> }
> }
> + if (unlikely(dead_task->thread.debugreg7))
> + flush_thread_hw_breakpoint(dead_task);
> }
>
> static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
> @@ -306,6 +310,11 @@ int copy_thread(unsigned long clone_flag
> savesegment(es, p->thread.es);
> savesegment(ds, p->thread.ds);
>
> + err = -ENOMEM;
> + if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
> + if (copy_thread_hw_breakpoint(me, p, clone_flags))
> + goto out;
> +
> if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
> p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
> if (!p->thread.io_bitmap_ptr) {
> @@ -344,6 +353,9 @@ out:
> kfree(p->thread.io_bitmap_ptr);
> p->thread.io_bitmap_max = 0;
> }
> + if (err)
> + flush_thread_hw_breakpoint(p);
> +
> return err;
> }
>
> @@ -489,6 +501,24 @@ __switch_to(struct task_struct *prev_p,
> */
> if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
> math_state_restore();
> + /*
> + * There's a problem with moving the arch_install_thread_hw_breakpoint()
> + * call before current is updated. Suppose a kernel breakpoint is
> + * triggered in between the two, the hw-breakpoint handler will see that
> + * the 'current' task does not have TIF_DEBUG flag set and will think it
> + * is leftover from an old task (lazy switching) and will erase it. Then
> + * until the next context switch, no user-breakpoints will be installed.
> + *
> + * The real problem is that it's impossible to update both current and
> + * physical debug registers at the same instant, so there will always be
> + * a window in which they disagree and a breakpoint might get triggered.
> + * Since we use lazy switching, we are forced to assume that a
> + * disagreement means that current is correct and the exception is due
> + * to lazy debug register switching.
> + */
> + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
> + arch_install_thread_hw_breakpoint(next_p);
> +
> return prev_p;
> }
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-29 10:49 ` Frederic Weisbecker
@ 2009-05-29 13:52 ` K.Prasad
2009-05-29 14:07 ` Frédéric Weisbecker
2009-05-29 13:54 ` Alan Stern
1 sibling, 1 reply; 40+ messages in thread
From: K.Prasad @ 2009-05-29 13:52 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: David Gibson, Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Fri, May 29, 2009 at 12:49:03PM +0200, Frederic Weisbecker wrote:
> On Fri, May 29, 2009 at 02:31:46PM +0530, K.Prasad wrote:
> > On Thu, May 28, 2009 at 04:42:38PM +1000, David Gibson wrote:
> > > On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
> > > > From: Alan Stern <stern@rowland.harvard.edu>
> > > >
> > > > This patch enables the use of abstract debug registers in
> > > > process-handling routines.
> > >
> > > [snip]
> > > >
> > > > + p->thread.io_bitmap_ptr = NULL;
> > >
> > > Why is manipulating the io_bitmap_ptr relevant to debug register
> > > handling?
> >
> > I *re-read* the patch but was unable to find how this change had sneaked
> > in. It shouldn't be there although it is harmless.
>
>
> When I reviewed this patch, I also ended stucked on it.
> But actually I guess I found the sense, this is only for
> convenience.
>
> Look at the current copy_thread() in arch/x86/kernel/process32.c
>
> If p->thread.io_bitmap_ptr fails to be duplicated, we set
> p->thread.io_bitmap_max = 0 and return -ENOMEM
>
> Now look at the patch.
> If we fail to copy the hardware thread virtual registers we
> want to exit with io_bitmap_ptr = NULL
> If we fail to copy the io_bitmap, we want to free the breakpoint
> and exit.
> If we fail further, we want to free breakpoints and io_bitmap_ptr
>
> The out section then tries to:
>
> -free the breakpoints
> -free p->thread.io_bitmap_ptr
>
>
> So it's important to set io_bitmap_ptr to NULL so that
> we know whether we have to release it or not.
>
>
aah...yes. It tricked me! It is needed to bring the desired
error-return behaviour of copy_thread(). Please ignore this patch (the
updation of the comments can be brought in through a separate
enhancement patch...see below).
> > Hi Frederic,
> > I am attaching a new version of this patch 06/12 that:
> >
> > - removes the line that assigns NULL to "p->thread.io_bitmap_ptr"
>
>
> Dangerous. Unless p->thread.io_bitmap_ptr is already zeroed out
> at this stage?
>
>
> > - Updates the comment in __switch_to() function which was stale (was
> > relevant when 'last_debugged_task' was used to detect lazy debug
> > register switching).
> >
> > Kindly integrate this version in lieu of the older version sent here:
> > http://lkml.org/lkml/2009/5/21/149.
>
>
> Ok. Well it would be nice if you resend the whole series actually :)
> Do you have another fix pending?
>
> Thanks!
>
In the process of responding to David Gibson's comments I agreed to
a few minor/cosmetic changes - say like updation of comments, renaming
functions or variables, etc.
Given that the patchset is on the verge of integration into -tip tree, I
would prefer them to be done through a separate patch (on -tip) for
enhancement. Kindly let me know what you think about proceeding this
way.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-29 10:49 ` Frederic Weisbecker
2009-05-29 13:52 ` K.Prasad
@ 2009-05-29 13:54 ` Alan Stern
1 sibling, 0 replies; 40+ messages in thread
From: Alan Stern @ 2009-05-29 13:54 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: K.Prasad, David Gibson, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Fri, 29 May 2009, Frederic Weisbecker wrote:
> On Fri, May 29, 2009 at 02:31:46PM +0530, K.Prasad wrote:
> > On Thu, May 28, 2009 at 04:42:38PM +1000, David Gibson wrote:
> > > On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
> > > > From: Alan Stern <stern@rowland.harvard.edu>
> > > >
> > > > This patch enables the use of abstract debug registers in
> > > > process-handling routines.
> > >
> > > [snip]
> > > >
> > > > + p->thread.io_bitmap_ptr = NULL;
> > >
> > > Why is manipulating the io_bitmap_ptr relevant to debug register
> > > handling?
> >
> > I *re-read* the patch but was unable to find how this change had sneaked
> > in. It shouldn't be there although it is harmless.
>
>
> When I reviewed this patch, I also ended stucked on it.
> But actually I guess I found the sense, this is only for
> convenience.
>
> Look at the current copy_thread() in arch/x86/kernel/process32.c
>
> If p->thread.io_bitmap_ptr fails to be duplicated, we set
> p->thread.io_bitmap_max = 0 and return -ENOMEM
>
> Now look at the patch.
> If we fail to copy the hardware thread virtual registers we
> want to exit with io_bitmap_ptr = NULL
> If we fail to copy the io_bitmap, we want to free the breakpoint
> and exit.
> If we fail further, we want to free breakpoints and io_bitmap_ptr
>
> The out section then tries to:
>
> -free the breakpoints
> -free p->thread.io_bitmap_ptr
>
>
> So it's important to set io_bitmap_ptr to NULL so that
> we know whether we have to release it or not.
That's exactly why I wrote it.
Now, if we could guarantee that io_bitmap_ptr was always NULL upon
entry to copy_thread(), the statement could of course be removed. In
fact it probably is NULL, since otherwise the current code wouldn't
work properly.
Alan Stern
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-29 13:52 ` K.Prasad
@ 2009-05-29 14:07 ` Frédéric Weisbecker
2009-05-30 11:00 ` K.Prasad
0 siblings, 1 reply; 40+ messages in thread
From: Frédéric Weisbecker @ 2009-05-29 14:07 UTC (permalink / raw)
To: prasad
Cc: David Gibson, Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
2009/5/29 K.Prasad <prasad@linux.vnet.ibm.com>:
> On Fri, May 29, 2009 at 12:49:03PM +0200, Frederic Weisbecker wrote:
>> On Fri, May 29, 2009 at 02:31:46PM +0530, K.Prasad wrote:
>> > On Thu, May 28, 2009 at 04:42:38PM +1000, David Gibson wrote:
>> > > On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
>> > > > From: Alan Stern <stern@rowland.harvard.edu>
>> > > >
>> > > > This patch enables the use of abstract debug registers in
>> > > > process-handling routines.
>> > >
>> > > [snip]
>> > > >
>> > > > + p->thread.io_bitmap_ptr = NULL;
>> > >
>> > > Why is manipulating the io_bitmap_ptr relevant to debug register
>> > > handling?
>> >
>> > I *re-read* the patch but was unable to find how this change had sneaked
>> > in. It shouldn't be there although it is harmless.
>>
>>
>> When I reviewed this patch, I also ended stucked on it.
>> But actually I guess I found the sense, this is only for
>> convenience.
>>
>> Look at the current copy_thread() in arch/x86/kernel/process32.c
>>
>> If p->thread.io_bitmap_ptr fails to be duplicated, we set
>> p->thread.io_bitmap_max = 0 and return -ENOMEM
>>
>> Now look at the patch.
>> If we fail to copy the hardware thread virtual registers we
>> want to exit with io_bitmap_ptr = NULL
>> If we fail to copy the io_bitmap, we want to free the breakpoint
>> and exit.
>> If we fail further, we want to free breakpoints and io_bitmap_ptr
>>
>> The out section then tries to:
>>
>> -free the breakpoints
>> -free p->thread.io_bitmap_ptr
>>
>>
>> So it's important to set io_bitmap_ptr to NULL so that
>> we know whether we have to release it or not.
>>
>>
>
> aah...yes. It tricked me! It is needed to bring the desired
> error-return behaviour of copy_thread(). Please ignore this patch (the
> updation of the comments can be brought in through a separate
> enhancement patch...see below).
Ok.
>> > Hi Frederic,
>> > I am attaching a new version of this patch 06/12 that:
>> >
>> > - removes the line that assigns NULL to "p->thread.io_bitmap_ptr"
>>
>>
>> Dangerous. Unless p->thread.io_bitmap_ptr is already zeroed out
>> at this stage?
>>
>>
>> > - Updates the comment in __switch_to() function which was stale (was
>> > relevant when 'last_debugged_task' was used to detect lazy debug
>> > register switching).
>> >
>> > Kindly integrate this version in lieu of the older version sent here:
>> > http://lkml.org/lkml/2009/5/21/149.
>>
>>
>> Ok. Well it would be nice if you resend the whole series actually :)
>> Do you have another fix pending?
>>
>> Thanks!
>>
>
> In the process of responding to David Gibson's comments I agreed to
> a few minor/cosmetic changes - say like updation of comments, renaming
> functions or variables, etc.
>
> Given that the patchset is on the verge of integration into -tip tree, I
> would prefer them to be done through a separate patch (on -tip) for
> enhancement. Kindly let me know what you think about proceeding this
> way.
Well, indeed it would be easier for you to start iterating with a merged base.
But I would prefer to pull-request the patchset to Ingo once the pending fixes
are sent.
So to start the integration of this, I can apply the current patches in my tree,
based on tracing/core. And once you have the minor fixes addressing David's
comments, I also apply them and send the whole to Ingo.
Ok?
But still could you resend me the whole patchset you have, including
the fixes already posted, so that I don't mess up through several versions.
And I guess I could apply them very soon.
Thanks.
> Thanks,
> K.Prasad
>
>
>
>
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090530103857.715014561@prasadkr_t60p.in.ibm.com>
@ 2009-05-30 10:52 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-30 10:52 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Linux Kernel Mailing List, Ingo Molnar, Alan Stern, K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 7286 bytes --]
This patch enables the use of abstract debug registers in
process-handling routines.
Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 23 ++++++-----------------
arch/x86/kernel/process_32.c | 28 ++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 65 insertions(+), 17 deletions(-)
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -106,14 +110,9 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -195,16 +194,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_32.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -262,7 +264,13 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -282,10 +290,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -424,6 +435,23 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_64.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -344,6 +354,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -489,6 +502,24 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
2009-05-29 14:07 ` Frédéric Weisbecker
@ 2009-05-30 11:00 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-05-30 11:00 UTC (permalink / raw)
To: Frédéric Weisbecker
Cc: David Gibson, Alan Stern, Steven Rostedt, Ingo Molnar,
Linux Kernel Mailing List, Andrew Morton, Benjamin Herrenschmidt,
maneesh, Roland McGrath, Masami Hiramatsu
On Fri, May 29, 2009 at 04:07:07PM +0200, Frédéric Weisbecker wrote:
> 2009/5/29 K.Prasad <prasad@linux.vnet.ibm.com>:
> > On Fri, May 29, 2009 at 12:49:03PM +0200, Frederic Weisbecker wrote:
> >> On Fri, May 29, 2009 at 02:31:46PM +0530, K.Prasad wrote:
> >> > On Thu, May 28, 2009 at 04:42:38PM +1000, David Gibson wrote:
> >> > > On Mon, May 11, 2009 at 05:23:44PM +0530, K.Prasad wrote:
> >> > > > From: Alan Stern <stern@rowland.harvard.edu>
> >> > > >
> >> > > > This patch enables the use of abstract debug registers in
> >> > > > process-handling routines.
> >> > >
> >> > > [snip]
> >> > > >
> >> > > > + p->thread.io_bitmap_ptr = NULL;
> >> > >
> >> > > Why is manipulating the io_bitmap_ptr relevant to debug register
> >> > > handling?
> >> >
> >> > I *re-read* the patch but was unable to find how this change had sneaked
> >> > in. It shouldn't be there although it is harmless.
> >>
> >>
> >> When I reviewed this patch, I also ended stucked on it.
> >> But actually I guess I found the sense, this is only for
> >> convenience.
> >>
> >> Look at the current copy_thread() in arch/x86/kernel/process32.c
> >>
> >> If p->thread.io_bitmap_ptr fails to be duplicated, we set
> >> p->thread.io_bitmap_max = 0 and return -ENOMEM
> >>
> >> Now look at the patch.
> >> If we fail to copy the hardware thread virtual registers we
> >> want to exit with io_bitmap_ptr = NULL
> >> If we fail to copy the io_bitmap, we want to free the breakpoint
> >> and exit.
> >> If we fail further, we want to free breakpoints and io_bitmap_ptr
> >>
> >> The out section then tries to:
> >>
> >> -free the breakpoints
> >> -free p->thread.io_bitmap_ptr
> >>
> >>
> >> So it's important to set io_bitmap_ptr to NULL so that
> >> we know whether we have to release it or not.
> >>
> >>
> >
> > aah...yes. It tricked me! It is needed to bring the desired
> > error-return behaviour of copy_thread(). Please ignore this patch (the
> > updation of the comments can be brought in through a separate
> > enhancement patch...see below).
>
>
> Ok.
>
>
> >> > Hi Frederic,
> >> > I am attaching a new version of this patch 06/12 that:
> >> >
> >> > - removes the line that assigns NULL to "p->thread.io_bitmap_ptr"
> >>
> >>
> >> Dangerous. Unless p->thread.io_bitmap_ptr is already zeroed out
> >> at this stage?
> >>
> >>
> >> > - Updates the comment in __switch_to() function which was stale (was
> >> > relevant when 'last_debugged_task' was used to detect lazy debug
> >> > register switching).
> >> >
> >> > Kindly integrate this version in lieu of the older version sent here:
> >> > http://lkml.org/lkml/2009/5/21/149.
> >>
> >>
> >> Ok. Well it would be nice if you resend the whole series actually :)
> >> Do you have another fix pending?
> >>
> >> Thanks!
> >>
> >
> > In the process of responding to David Gibson's comments I agreed to
> > a few minor/cosmetic changes - say like updation of comments, renaming
> > functions or variables, etc.
> >
> > Given that the patchset is on the verge of integration into -tip tree, I
> > would prefer them to be done through a separate patch (on -tip) for
> > enhancement. Kindly let me know what you think about proceeding this
> > way.
>
>
> Well, indeed it would be easier for you to start iterating with a merged base.
> But I would prefer to pull-request the patchset to Ingo once the pending fixes
> are sent.
>
> So to start the integration of this, I can apply the current patches in my tree,
> based on tracing/core. And once you have the minor fixes addressing David's
> comments, I also apply them and send the whole to Ingo.
>
> Ok?
> But still could you resend me the whole patchset you have, including
> the fixes already posted, so that I don't mess up through several versions.
>
> And I guess I could apply them very soon.
>
> Thanks.
>
Hi Frederic,
I have re-sent the patchset which includes some of the changes
that were made on them since their previous posting.
Please find the new patchset addressed to you starting here:
http://lkml.org/lkml/2009/5/30/67.
I would like to bring in some of the enhancements I agreed to David,
through a separate patchset after its integration into -tip tree. Just
as you mentioned above, it should be much easier to test once there's
a merged base.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 40+ messages in thread
* [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code
[not found] <20090601180605.799735829@prasadkr_t60p.in.ibm.com>
@ 2009-06-01 18:14 ` K.Prasad
0 siblings, 0 replies; 40+ messages in thread
From: K.Prasad @ 2009-06-01 18:14 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Linux Kernel Mailing List, Ingo Molnar, Alan Stern, K.Prasad
[-- Attachment #1: modify_process_related_06 --]
[-- Type: text/plain, Size: 7289 bytes --]
This patch enables the use of abstract debug registers in
process-handling routines.
Original-patch-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
arch/x86/kernel/process.c | 22 ++++++----------------
arch/x86/kernel/process_32.c | 28 ++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 65 insertions(+), 16 deletions(-)
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process.c
@@ -17,6 +17,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -48,6 +50,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
@@ -108,12 +112,8 @@ void flush_thread(void)
clear_tsk_thread_flag(tsk, TIF_DEBUG);
- tsk->thread.debugreg[0] = 0;
- tsk->thread.debugreg[1] = 0;
- tsk->thread.debugreg[2] = 0;
- tsk->thread.debugreg[3] = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -195,16 +195,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg[0], 0);
- set_debugreg(next->debugreg[1], 1);
- set_debugreg(next->debugreg[2], 2);
- set_debugreg(next->debugreg[3], 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_32.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_32.c
@@ -58,6 +58,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -262,7 +264,13 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
tsk = current;
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(tsk, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
@@ -282,10 +290,13 @@ int copy_thread(unsigned long clone_flag
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
+out:
if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
@@ -424,6 +435,23 @@ __switch_to(struct task_struct *prev_p,
lazy_load_gs(next->gs);
percpu_write(current_task, next_p);
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
return prev_p;
}
Index: linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
===================================================================
--- linux-2.6-tip.hbkpt.orig/arch/x86/kernel/process_64.c
+++ linux-2.6-tip.hbkpt/arch/x86/kernel/process_64.c
@@ -52,6 +52,8 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void);
@@ -245,6 +247,8 @@ void release_thread(struct task_struct *
BUG();
}
}
+ if (unlikely(dead_task->thread.debugreg7))
+ flush_thread_hw_breakpoint(dead_task);
}
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
@@ -300,12 +304,18 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
+ if (copy_thread_hw_breakpoint(me, p, clone_flags))
+ goto out;
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -344,6 +354,9 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+ if (err)
+ flush_thread_hw_breakpoint(p);
+
return err;
}
@@ -489,6 +502,24 @@ __switch_to(struct task_struct *prev_p,
*/
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
^ permalink raw reply [flat|nested] 40+ messages in thread
end of thread, other threads:[~2009-06-01 18:15 UTC | newest]
Thread overview: 40+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
2009-05-11 11:52 ` [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces K.Prasad
2009-05-28 5:28 ` David Gibson
2009-05-28 11:10 ` K.Prasad
2009-05-11 11:52 ` [Patch 02/12] Introducing generic hardware breakpoint handler interfaces K.Prasad
2009-05-11 12:12 ` Bharata B Rao
2009-05-11 12:16 ` K.Prasad
2009-05-28 6:15 ` David Gibson
2009-05-28 11:55 ` K.Prasad
2009-05-29 2:59 ` David Gibson
2009-05-11 11:53 ` [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
2009-05-28 6:35 ` David Gibson
2009-05-28 13:41 ` K.Prasad
2009-05-29 3:15 ` David Gibson
2009-05-11 11:53 ` [Patch 04/12] Modifying generic debug exception to use thread-specific debug registers K.Prasad
2009-05-11 11:53 ` [Patch 05/12] Use wrapper routines around debug registers in processor related functions K.Prasad
2009-05-11 11:53 ` [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
2009-05-28 6:42 ` David Gibson
2009-05-29 9:01 ` K.Prasad
2009-05-29 10:49 ` Frederic Weisbecker
2009-05-29 13:52 ` K.Prasad
2009-05-29 14:07 ` Frédéric Weisbecker
2009-05-30 11:00 ` K.Prasad
2009-05-29 13:54 ` Alan Stern
2009-05-11 11:53 ` [Patch 07/12] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
2009-05-11 11:54 ` [Patch 08/12] Modify Ptrace routines to access breakpoint registers K.Prasad
2009-05-11 11:54 ` [Patch 09/12] Cleanup HW Breakpoint registers before kexec K.Prasad
2009-05-11 11:54 ` [Patch 10/12] Sample HW breakpoint over kernel data address K.Prasad
2009-05-11 11:55 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4 K.Prasad
2009-05-11 22:14 ` Frederic Weisbecker
2009-05-12 14:19 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint " K.Prasad
2009-05-12 15:15 ` Frederic Weisbecker
2009-05-12 20:02 ` [Patch 11/12] ftrace plugin for kernel symbol tracing usingHWBreakpoint " K.Prasad
2009-05-11 11:55 ` [Patch 12/12] Reset bits in dr6 after the corresponding exception is handled K.Prasad
[not found] <20090601180605.799735829@prasadkr_t60p.in.ibm.com>
2009-06-01 18:14 ` [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
[not found] <20090530103857.715014561@prasadkr_t60p.in.ibm.com>
2009-05-30 10:52 ` K.Prasad
[not found] <20090521095613.834622717@prasadkr_t60p.in.ibm.com>
2009-05-21 14:02 ` K.Prasad
[not found] <20090515105133.629980476@prasadkr_t60p.in.ibm.com>
2009-05-15 10:58 ` K.Prasad
2009-05-16 0:29 ` K.Prasad
[not found] <20090513160546.592373797@prasadkr_t60p.in.ibm.com>
2009-05-13 16:14 ` K.Prasad
[not found] <20090424055710.764502564@prasadkr_t60p.in.ibm.com>
2009-04-24 6:17 ` K.Prasad
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox