public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5/5] IA64 dynamic ftrace support
@ 2008-12-23  6:08 Shaohua Li
  2008-12-23 14:35 ` Steven Rostedt
                   ` (16 more replies)
  0 siblings, 17 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-23  6:08 UTC (permalink / raw)
  To: linux-ia64

IA64 dynamic ftrace support. The main tricky thing here is to support module.
In a module, each routine's mcount call will call a PLT stub, which
will call kernel mcount. We can't simply make the mcount call call into
kernel mcount, as kernel and mocule have different gp and the
instruction just supports 25bit offset. So I introduced a new PLT stub,
which will call into kernel ftrace_caller. When module loading, all
mcount call will be converted to nop. When the nop is converted to call,
we make the call to the new PLT stub instead of old mcount PLT stub.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
---
 arch/ia64/Kconfig              |    2 
 arch/ia64/include/asm/ftrace.h |   17 ++
 arch/ia64/include/asm/module.h |    4 
 arch/ia64/kernel/Makefile      |    5 
 arch/ia64/kernel/entry.S       |   37 ++++++
 arch/ia64/kernel/ftrace.c      |  234 +++++++++++++++++++++++++++++++++++++++++
 arch/ia64/kernel/module.c      |   15 ++
 scripts/recordmcount.pl        |    7 +
 8 files changed, 321 insertions(+)

Index: linux/arch/ia64/Kconfig
=================================--- linux.orig/arch/ia64/Kconfig	2008-12-23 13:13:17.000000000 +0800
+++ linux/arch/ia64/Kconfig	2008-12-23 13:30:09.000000000 +0800
@@ -21,6 +21,8 @@ config IA64
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
 	select HAVE_KVM
Index: linux/arch/ia64/kernel/Makefile
=================================--- linux.orig/arch/ia64/kernel/Makefile	2008-12-23 13:11:27.000000000 +0800
+++ linux/arch/ia64/kernel/Makefile	2008-12-23 13:30:09.000000000 +0800
@@ -2,6 +2,10 @@
 # Makefile for the linux kernel.
 #
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
Index: linux/arch/ia64/kernel/ftrace.c
=================================--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/ia64/kernel/ftrace.c	2008-12-23 13:30:09.000000000 +0800
@@ -0,0 +1,234 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00,	/* nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00,	/* nop.i 0x0 */
+	0x00, 0x00, 0x04, 0x00,			/* nop.i 0x0 */
+	0x01, 0x00, 0x00, 0x00, 0x01, 0x00,	/* nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00,	/* nop.i 0x0 */
+	0x00, 0x00, 0x04, 0x00			/* nop.i 0x0;; */
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+static void calculate_offset(unsigned long ip, unsigned long addr,
+	unsigned long *offset, int *sign)
+{
+	long of = addr - (ip + 0x10);
+	if (of < 0)
+		*sign = 1;
+	else
+		*sign = 0;
+	*offset = of >> 4;
+}
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset;
+	int sign;
+
+	calculate_offset(ip, addr, &offset, &sign);
+	code->sign = sign;
+	code->imm20 = offset;
+
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	struct ftrace_call_insn *call_insn = (void *)ftrace_call_code, *tmp_call;
+	unsigned long ip = rec->ip;
+	int is_kernel_address = core_kernel_text(ip);
+	unsigned char *old, *new;
+	int do_check = 0;
+
+	if (is_kernel_address) {
+		do_check = 1;
+		old = ftrace_call_replace(ip, addr);
+		goto modify;
+	}
+
+	if (!rec->arch.mod) {
+		if (!mod) {
+			printk(KERN_ERR "No module loaded addr=%lx\n",
+			       addr);
+			return -EFAULT;
+		}
+		rec->arch.mod = mod;
+	} else if (mod) {
+		if (mod != rec->arch.mod) {
+			printk(KERN_ERR
+			       "Record mod %p not equal to passed in mod %p\n",
+			       rec->arch.mod, mod);
+			return -EINVAL;
+		}
+		/* nothing to do if mod = rec->arch.mod */
+	} else
+		mod = rec->arch.mod;
+
+	/* the mcount call is a call to a PLT entry */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	tmp_call = (void *)replaced;
+	call_insn->sign = tmp_call->sign;
+	call_insn->imm20 = tmp_call->imm20;
+	if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+	old = ftrace_call_code;
+
+modify:
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(ip, old, new, do_check);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	int is_kernel_address = core_kernel_text(ip);
+	struct module *mod;
+	unsigned char *old, *new;
+
+	if (is_kernel_address)
+		goto modify;
+
+	if (!rec->arch.mod) {
+		printk(KERN_ERR "No module loaded\n");
+		return -EINVAL;
+	}
+	mod = rec->arch.mod;
+	if (!mod->arch.ftrace_caller_plt) {
+		printk(KERN_ERR "No mcount PLT entry for module %s\n",
+			mod->name);
+		return -EINVAL;
+	}
+	addr = mod->arch.ftrace_caller_plt;
+modify:
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+struct ftrace_caller_code {
+	u64 dummy1:46;
+	u64 imm41_18:64-46;
+	u64 imm41_23:41-(64-46);
+	u64 dummy2:13;
+	u64 imm7b:7;
+	u64 dummy3:1;
+	u64 ic:1;
+	u64 imm5c:5;
+	u64 imm9d:9;
+	u64 i:1;
+	u64 dummy4:4;
+};
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+	struct ftrace_caller_code *code =  (void *)addr;
+
+	if (func = ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	code->i = ip >> 63;
+	code->imm9d = ip >> 7;
+	code->imm5c = ip >> 16;
+	code->ic = ip >> 21;
+	code->imm7b = ip & 0x7f;
+	code->imm41_18 = ip >> 22;
+	code->imm41_23 = ip >> 40;
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
Index: linux/scripts/recordmcount.pl
=================================--- linux.orig/scripts/recordmcount.pl	2008-12-23 13:24:59.000000000 +0800
+++ linux/scripts/recordmcount.pl	2008-12-23 13:30:09.000000000 +0800
@@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
     $alignment = 2;
     $section_type = '%progbits';
 
+} elsif ($arch eq "ia64") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+    $type = "data8";
+
+    if ($is_module eq "0") {
+        $cc .= " -mconstant-gp";
+    }
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }
Index: linux/arch/ia64/include/asm/ftrace.h
=================================--- linux.orig/arch/ia64/include/asm/ftrace.h	2008-12-23 13:13:17.000000000 +0800
+++ linux/arch/ia64/include/asm/ftrace.h	2008-12-23 13:30:09.000000000 +0800
@@ -7,6 +7,23 @@
 #ifndef __ASSEMBLY__
 extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
 #define mcount _mcount
+#define ftrace_call ftrace_caller
+
+#include <asm/kprobes.h>
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* second bundle, insn 2 */
+	return addr - 0x12;
+}
+
+
+struct dyn_arch_ftrace {
+	struct module *mod;
+};
 
 #endif
 
Index: linux/arch/ia64/include/asm/module.h
=================================--- linux.orig/arch/ia64/include/asm/module.h	2008-12-23 13:11:27.000000000 +0800
+++ linux/arch/ia64/include/asm/module.h	2008-12-23 13:30:09.000000000 +0800
@@ -21,6 +21,10 @@ struct mod_arch_specific {
 	void *core_unw_table;		/* core unwind-table cookie returned by unwinder */
 	void *init_unw_table;		/* init unwind-table cookie returned by unwinder */
 	unsigned int next_got_entry;	/* index of next available got entry */
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	uint64_t ftrace_caller_plt;
+#endif
 };
 
 #define Elf_Shdr	Elf64_Shdr
Index: linux/arch/ia64/kernel/module.c
=================================--- linux.orig/arch/ia64/kernel/module.c	2008-12-23 13:11:27.000000000 +0800
+++ linux/arch/ia64/kernel/module.c	2008-12-23 13:30:09.000000000 +0800
@@ -32,6 +32,7 @@
 #include <linux/moduleloader.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
+#include <linux/ftrace.h>
 
 #include <asm/patch.h>
 #include <asm/unaligned.h>
@@ -468,6 +469,9 @@ module_frob_arch_sections (Elf_Ehdr *ehd
 			core_plts += count_plts(rels, numrels);
 	}
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+	core_plts++;
+#endif
 	mod->arch.core_plt->sh_type = SHT_NOBITS;
 	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
 	mod->arch.core_plt->sh_addralign = 16;
@@ -839,6 +843,17 @@ apply_relocate_add (Elf64_Shdr *sechdrs,
 		if (ret < 0)
 			return ret;
 	}
+#ifdef CONFIG_DYNAMIC_FTRACE
+	if (!mod->arch.ftrace_caller_plt) {
+		int ok = 1;
+		/* fake a 'struct insn' for get_plt, which is in module core */
+		uint64_t fake_insn = (uint64_t)mod->module_init + mod->init_size;
+		mod->arch.ftrace_caller_plt = get_plt(mod,
+			(struct insn *)fake_insn, (uint64_t)ftrace_caller, &ok);
+		if (!ok)
+			mod->arch.ftrace_caller_plt = 0;
+	}
+#endif
 	return 0;
 }
 
Index: linux/arch/ia64/kernel/entry.S
=================================--- linux.orig/arch/ia64/kernel/entry.S	2008-12-23 13:13:17.000000000 +0800
+++ linux/arch/ia64/kernel/entry.S	2008-12-23 13:30:09.000000000 +0800
@@ -1406,6 +1406,42 @@ GLOBAL_ENTRY(unw_init_running)
 END(unw_init_running)
 
 #ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+	br ftrace_stub
+END(_mcount)
+
+.here:
+	br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+{
+	.mlx
+	nop.m 0x0
+	movl r3 = .here;;
+}
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(ftrace_caller)
+
+#else
 GLOBAL_ENTRY(_mcount)
 	movl r2 = ftrace_stub
 	movl r3 = ftrace_trace_function;;
@@ -1435,6 +1471,7 @@ GLOBAL_ENTRY(_mcount)
 	br ftrace_stub
 	;;
 END(_mcount)
+#endif
 
 GLOBAL_ENTRY(ftrace_stub)
 	mov r3 = b0



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
@ 2008-12-23 14:35 ` Steven Rostedt
  2008-12-24  0:54 ` Shaohua Li
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Steven Rostedt @ 2008-12-23 14:35 UTC (permalink / raw)
  To: linux-ia64


On Tue, 2008-12-23 at 14:08 +0800, Shaohua Li wrote:
> IA64 dynamic ftrace support. The main tricky thing here is to support module.
> In a module, each routine's mcount call will call a PLT stub, which
> will call kernel mcount. We can't simply make the mcount call call into
> kernel mcount, as kernel and mocule have different gp and the
> instruction just supports 25bit offset. So I introduced a new PLT stub,
> which will call into kernel ftrace_caller. When module loading, all
> mcount call will be converted to nop. When the nop is converted to call,
> we make the call to the new PLT stub instead of old mcount PLT stub.
> 
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> ---
>  arch/ia64/Kconfig              |    2 
>  arch/ia64/include/asm/ftrace.h |   17 ++
>  arch/ia64/include/asm/module.h |    4 
>  arch/ia64/kernel/Makefile      |    5 
>  arch/ia64/kernel/entry.S       |   37 ++++++
>  arch/ia64/kernel/ftrace.c      |  234 +++++++++++++++++++++++++++++++++++++++++
>  arch/ia64/kernel/module.c      |   15 ++
>  scripts/recordmcount.pl        |    7 +
>  8 files changed, 321 insertions(+)
> 
> Index: linux/arch/ia64/Kconfig
> =================================> --- linux.orig/arch/ia64/Kconfig	2008-12-23 13:13:17.000000000 +0800
> +++ linux/arch/ia64/Kconfig	2008-12-23 13:30:09.000000000 +0800
> @@ -21,6 +21,8 @@ config IA64
>  	select HAVE_OPROFILE
>  	select HAVE_KPROBES
>  	select HAVE_KRETPROBES
> +	select HAVE_FTRACE_MCOUNT_RECORD
> +	select HAVE_DYNAMIC_FTRACE
>  	select HAVE_FUNCTION_TRACER
>  	select HAVE_DMA_ATTRS
>  	select HAVE_KVM
> Index: linux/arch/ia64/kernel/Makefile
> =================================> --- linux.orig/arch/ia64/kernel/Makefile	2008-12-23 13:11:27.000000000 +0800
> +++ linux/arch/ia64/kernel/Makefile	2008-12-23 13:30:09.000000000 +0800
> @@ -2,6 +2,10 @@
>  # Makefile for the linux kernel.
>  #
>  
> +ifdef CONFIG_DYNAMIC_FTRACE
> +CFLAGS_REMOVE_ftrace.o = -pg
> +endif
> +
>  extra-y	:= head.o init_task.o vmlinux.lds
>  
>  obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
> @@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
>  obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
>  obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
>  obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
> +obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
>  obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
>  obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
>  obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
> Index: linux/arch/ia64/kernel/ftrace.c
> =================================> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux/arch/ia64/kernel/ftrace.c	2008-12-23 13:30:09.000000000 +0800
> @@ -0,0 +1,234 @@
> +/*
> + * Dynamic function tracing support.
> + *
> + * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
> + *
> + * For licencing details, see COPYING.
> + *
> + * Defines low-level handling of mcount calls when the kernel
> + * is compiled with the -pg flag. When using dynamic ftrace, the
> + * mcount call-sites get patched lazily with NOP till they are
> + * enabled. All code mutation routines here take effect atomically.
> + */
> +
> +#include <linux/uaccess.h>
> +#include <linux/ftrace.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/ftrace.h>
> +
> +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
> +	0x00, 0x00, 0x00, 0x00, 0x01, 0x00,	/* nop.m 0x0 */
> +	0x00, 0x00, 0x00, 0x02, 0x00, 0x00,	/* nop.i 0x0 */
> +	0x00, 0x00, 0x04, 0x00,			/* nop.i 0x0 */
> +	0x01, 0x00, 0x00, 0x00, 0x01, 0x00,	/* nop.m 0x0 */
> +	0x00, 0x00, 0x00, 0x02, 0x00, 0x00,	/* nop.i 0x0 */
> +	0x00, 0x00, 0x04, 0x00			/* nop.i 0x0;; */
> +};

As I stated before, you can not have a multi-line nop. The best you can
do is add a jump over the entire call, and only if that jump is a single
instruction.

Think about it, lets do a simple scenario.

Some process is running inside the kernel and starts to execute one of
these 'nop' sequences. After executing the third nop, it is preempted
(before finishing the other nops). During this preemption, you happen to
start the tracer. Kstop_machine is called and all processes are now
stopped.  The kstop_machine changes the nop to the call of a function
tracer and resumes.  Now that original process gets scheduled back in,
but the code is no longer nops, it has actual code and a call. But we
missed the first 3 commands. Not to mention, it looks like the op codes
are not even 4 or 8 byte aligned. So we could be executing something
that is not even a command. BAM! Crash! kernel oops! ;-)


> +
> +static unsigned char *ftrace_nop_replace(void)
> +{
> +	return ftrace_nop_code;
> +}
> +
> +/* In IA64, each function will be added below two bundles with -pg option */
> +static unsigned char __attribute__((aligned(8)))
> +ftrace_call_code[MCOUNT_INSN_SIZE] = {
> +	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
> +	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
> +	0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
> +	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
> +	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */

If you made your own PLT stub, could you just change the one line to
jump to that stub?


> +	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
> +};
> +
> +struct ftrace_call_insn {
> +	u64 dummy1, dummy2, dummy3;
> +	u64 dummy4:64-41+13;
> +	u64 imm20:20;
> +	u64 dummy5:3;
> +	u64 sign:1;
> +	u64 dummy6:4;
> +};
> +
> +static void calculate_offset(unsigned long ip, unsigned long addr,
> +	unsigned long *offset, int *sign)
> +{
> +	long of = addr - (ip + 0x10);
> +	if (of < 0)
> +		*sign = 1;
> +	else
> +		*sign = 0;
> +	*offset = of >> 4;
> +}
> +
> +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
> +{
> +	struct ftrace_call_insn *code = (void *)ftrace_call_code;
> +	unsigned long offset;
> +	int sign;
> +
> +	calculate_offset(ip, addr, &offset, &sign);
> +	code->sign = sign;
> +	code->imm20 = offset;
> +
> +	return ftrace_call_code;
> +}
> +
> +static int
> +ftrace_modify_code(unsigned long ip, unsigned char *old_code,
> +		   unsigned char *new_code, int do_check)
> +{
> +	unsigned char replaced[MCOUNT_INSN_SIZE];
> +
> +	/*
> +	 * Note: Due to modules and __init, code can
> +	 *  disappear and change, we need to protect against faulting
> +	 *  as well as code changing. We do this by using the
> +	 *  probe_kernel_* functions.
> +	 *
> +	 * No real locking needed, this code is run through
> +	 * kstop_machine, or before SMP starts.
> +	 */
> +
> +	if (!do_check)
> +		goto skip_check;
> +
> +	/* read the text we want to modify */
> +	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
> +		return -EFAULT;
> +
> +	/* Make sure it is what we expect it to be */
> +	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
> +		return -EINVAL;
> +
> +skip_check:
> +	/* replace the text with the new text */
> +	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
> +		return -EPERM;
> +
> +	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
> +
> +	return 0;
> +}
> +
> +int ftrace_make_nop(struct module *mod,
> +		    struct dyn_ftrace *rec, unsigned long addr)
> +{
> +	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
> +	struct ftrace_call_insn *call_insn = (void *)ftrace_call_code, *tmp_call;
> +	unsigned long ip = rec->ip;
> +	int is_kernel_address = core_kernel_text(ip);
> +	unsigned char *old, *new;
> +	int do_check = 0;
> +
> +	if (is_kernel_address) {
> +		do_check = 1;
> +		old = ftrace_call_replace(ip, addr);
> +		goto modify;
> +	}
> +
> +	if (!rec->arch.mod) {
> +		if (!mod) {
> +			printk(KERN_ERR "No module loaded addr=%lx\n",
> +			       addr);
> +			return -EFAULT;
> +		}
> +		rec->arch.mod = mod;
> +	} else if (mod) {
> +		if (mod != rec->arch.mod) {
> +			printk(KERN_ERR
> +			       "Record mod %p not equal to passed in mod %p\n",
> +			       rec->arch.mod, mod);
> +			return -EINVAL;
> +		}
> +		/* nothing to do if mod = rec->arch.mod */
> +	} else
> +		mod = rec->arch.mod;
> +
> +	/* the mcount call is a call to a PLT entry */
> +	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
> +		return -EFAULT;
> +	tmp_call = (void *)replaced;
> +	call_insn->sign = tmp_call->sign;
> +	call_insn->imm20 = tmp_call->imm20;
> +	if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
> +		return -EINVAL;
> +	old = ftrace_call_code;
> +
> +modify:
> +	new = ftrace_nop_replace();
> +	return ftrace_modify_code(ip, old, new, do_check);
> +}
> +
> +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> +{
> +	unsigned long ip = rec->ip;
> +	int is_kernel_address = core_kernel_text(ip);
> +	struct module *mod;
> +	unsigned char *old, *new;
> +
> +	if (is_kernel_address)
> +		goto modify;
> +
> +	if (!rec->arch.mod) {
> +		printk(KERN_ERR "No module loaded\n");
> +		return -EINVAL;
> +	}
> +	mod = rec->arch.mod;
> +	if (!mod->arch.ftrace_caller_plt) {
> +		printk(KERN_ERR "No mcount PLT entry for module %s\n",
> +			mod->name);
> +		return -EINVAL;
> +	}
> +	addr = mod->arch.ftrace_caller_plt;
> +modify:
> +	old=  ftrace_nop_replace();
> +	new = ftrace_call_replace(ip, addr);
> +	return ftrace_modify_code(ip, old, new, 1);
> +}
> +
> +struct ftrace_caller_code {
> +	u64 dummy1:46;
> +	u64 imm41_18:64-46;
> +	u64 imm41_23:41-(64-46);
> +	u64 dummy2:13;
> +	u64 imm7b:7;
> +	u64 dummy3:1;
> +	u64 ic:1;
> +	u64 imm5c:5;
> +	u64 imm9d:9;
> +	u64 i:1;
> +	u64 dummy4:4;
> +};
> +
> +/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
> +int ftrace_update_ftrace_func(ftrace_func_t func)
> +{
> +	unsigned long ip;
> +	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
> +	struct ftrace_caller_code *code =  (void *)addr;
> +
> +	if (func = ftrace_stub)
> +		return 0;
> +	ip = ((struct fnptr *)func)->ip;
> +
> +	code->i = ip >> 63;
> +	code->imm9d = ip >> 7;
> +	code->imm5c = ip >> 16;
> +	code->ic = ip >> 21;
> +	code->imm7b = ip & 0x7f;
> +	code->imm41_18 = ip >> 22;
> +	code->imm41_23 = ip >> 40;
> +
> +	flush_icache_range(addr, addr + 16);
> +	return 0;
> +}
> +
> +/* run from kstop_machine */
> +int __init ftrace_dyn_arch_init(void *data)
> +{
> +	*(unsigned long *)data = 0;
> +
> +	return 0;
> +}
> Index: linux/scripts/recordmcount.pl
> =================================> --- linux.orig/scripts/recordmcount.pl	2008-12-23 13:24:59.000000000 +0800
> +++ linux/scripts/recordmcount.pl	2008-12-23 13:30:09.000000000 +0800
> @@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
>      $alignment = 2;
>      $section_type = '%progbits';
>  
> +} elsif ($arch eq "ia64") {
> +    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
> +    $type = "data8";
> +
> +    if ($is_module eq "0") {
> +        $cc .= " -mconstant-gp";
> +    }

I wonder if it would be better to pass in CFLAGS and then be able to
parse that instead. Then we can find out a lot more about what we are
working on.

-- Steve



>  } else {
>      die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
>  }
> Index: linux/arch/ia64/include/asm/ftrace.h
> =================================> --- linux.orig/arch/ia64/include/asm/ftrace.h	2008-12-23 13:13:17.000000000 +0800
> +++ linux/arch/ia64/include/asm/ftrace.h	2008-12-23 13:30:09.000000000 +0800
> @@ -7,6 +7,23 @@
>  #ifndef __ASSEMBLY__
>  extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
>  #define mcount _mcount
> +#define ftrace_call ftrace_caller
> +
> +#include <asm/kprobes.h>
> +/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
> +#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
> +#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
> +
> +static inline unsigned long ftrace_call_adjust(unsigned long addr)
> +{
> +	/* second bundle, insn 2 */
> +	return addr - 0x12;
> +}
> +
> +
> +struct dyn_arch_ftrace {
> +	struct module *mod;
> +};
>  
>  #endif
>  
> Index: linux/arch/ia64/include/asm/module.h
> =================================> --- linux.orig/arch/ia64/include/asm/module.h	2008-12-23 13:11:27.000000000 +0800
> +++ linux/arch/ia64/include/asm/module.h	2008-12-23 13:30:09.000000000 +0800
> @@ -21,6 +21,10 @@ struct mod_arch_specific {
>  	void *core_unw_table;		/* core unwind-table cookie returned by unwinder */
>  	void *init_unw_table;		/* init unwind-table cookie returned by unwinder */
>  	unsigned int next_got_entry;	/* index of next available got entry */
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +	uint64_t ftrace_caller_plt;
> +#endif
>  };
>  
>  #define Elf_Shdr	Elf64_Shdr
> Index: linux/arch/ia64/kernel/module.c
> =================================> --- linux.orig/arch/ia64/kernel/module.c	2008-12-23 13:11:27.000000000 +0800
> +++ linux/arch/ia64/kernel/module.c	2008-12-23 13:30:09.000000000 +0800
> @@ -32,6 +32,7 @@
>  #include <linux/moduleloader.h>
>  #include <linux/string.h>
>  #include <linux/vmalloc.h>
> +#include <linux/ftrace.h>
>  
>  #include <asm/patch.h>
>  #include <asm/unaligned.h>
> @@ -468,6 +469,9 @@ module_frob_arch_sections (Elf_Ehdr *ehd
>  			core_plts += count_plts(rels, numrels);
>  	}
>  
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +	core_plts++;
> +#endif
>  	mod->arch.core_plt->sh_type = SHT_NOBITS;
>  	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>  	mod->arch.core_plt->sh_addralign = 16;
> @@ -839,6 +843,17 @@ apply_relocate_add (Elf64_Shdr *sechdrs,
>  		if (ret < 0)
>  			return ret;
>  	}
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +	if (!mod->arch.ftrace_caller_plt) {
> +		int ok = 1;
> +		/* fake a 'struct insn' for get_plt, which is in module core */
> +		uint64_t fake_insn = (uint64_t)mod->module_init + mod->init_size;
> +		mod->arch.ftrace_caller_plt = get_plt(mod,
> +			(struct insn *)fake_insn, (uint64_t)ftrace_caller, &ok);
> +		if (!ok)
> +			mod->arch.ftrace_caller_plt = 0;
> +	}
> +#endif
>  	return 0;
>  }
>  
> Index: linux/arch/ia64/kernel/entry.S
> =================================> --- linux.orig/arch/ia64/kernel/entry.S	2008-12-23 13:13:17.000000000 +0800
> +++ linux/arch/ia64/kernel/entry.S	2008-12-23 13:30:09.000000000 +0800
> @@ -1406,6 +1406,42 @@ GLOBAL_ENTRY(unw_init_running)
>  END(unw_init_running)
>  
>  #ifdef CONFIG_FUNCTION_TRACER
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +GLOBAL_ENTRY(_mcount)
> +	br ftrace_stub
> +END(_mcount)
> +
> +.here:
> +	br.ret.sptk.many b0
> +
> +GLOBAL_ENTRY(ftrace_caller)
> +{
> +	.mlx
> +	nop.m 0x0
> +	movl r3 = .here;;
> +}
> +	alloc loc0 = ar.pfs, 4, 4, 2, 0
> +	;;
> +	mov loc1 = b0
> +	mov out0 = b0
> +	mov loc2 = r8
> +	mov loc3 = r15
> +	;;
> +	adds out0 = -MCOUNT_INSN_SIZE, out0
> +	mov out1 = in2
> +	mov b6 = r3
> +
> +	br.call.sptk.many b0 = b6
> +	;;
> +	mov ar.pfs = loc0
> +	mov b0 = loc1
> +	mov r8 = loc2
> +	mov r15 = loc3
> +	br ftrace_stub
> +	;;
> +END(ftrace_caller)
> +
> +#else
>  GLOBAL_ENTRY(_mcount)
>  	movl r2 = ftrace_stub
>  	movl r3 = ftrace_trace_function;;
> @@ -1435,6 +1471,7 @@ GLOBAL_ENTRY(_mcount)
>  	br ftrace_stub
>  	;;
>  END(_mcount)
> +#endif
>  
>  GLOBAL_ENTRY(ftrace_stub)
>  	mov r3 = b0
> 
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
  2008-12-23 14:35 ` Steven Rostedt
@ 2008-12-24  0:54 ` Shaohua Li
  2008-12-24  1:00 ` Steven Rostedt
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-24  0:54 UTC (permalink / raw)
  To: linux-ia64

On Tue, Dec 23, 2008 at 10:35:49PM +0800, Steven Rostedt wrote:
> 
> On Tue, 2008-12-23 at 14:08 +0800, Shaohua Li wrote:
> > IA64 dynamic ftrace support. The main tricky thing here is to support module.
> > In a module, each routine's mcount call will call a PLT stub, which
> > will call kernel mcount. We can't simply make the mcount call call into
> > kernel mcount, as kernel and mocule have different gp and the
> > instruction just supports 25bit offset. So I introduced a new PLT stub,
> > which will call into kernel ftrace_caller. When module loading, all
> > mcount call will be converted to nop. When the nop is converted to call,
> > we make the call to the new PLT stub instead of old mcount PLT stub.
> >
> > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > ---
> >  arch/ia64/Kconfig              |    2
> >  arch/ia64/include/asm/ftrace.h |   17 ++
> >  arch/ia64/include/asm/module.h |    4
> >  arch/ia64/kernel/Makefile      |    5
> >  arch/ia64/kernel/entry.S       |   37 ++++++
> >  arch/ia64/kernel/ftrace.c      |  234 +++++++++++++++++++++++++++++++++++++++++
> >  arch/ia64/kernel/module.c      |   15 ++
> >  scripts/recordmcount.pl        |    7 +
> >  8 files changed, 321 insertions(+)
> >
> > Index: linux/arch/ia64/Kconfig
> > =================================> > --- linux.orig/arch/ia64/Kconfig      2008-12-23 13:13:17.000000000 +0800
> > +++ linux/arch/ia64/Kconfig   2008-12-23 13:30:09.000000000 +0800
> > @@ -21,6 +21,8 @@ config IA64
> >       select HAVE_OPROFILE
> >       select HAVE_KPROBES
> >       select HAVE_KRETPROBES
> > +     select HAVE_FTRACE_MCOUNT_RECORD
> > +     select HAVE_DYNAMIC_FTRACE
> >       select HAVE_FUNCTION_TRACER
> >       select HAVE_DMA_ATTRS
> >       select HAVE_KVM
> > Index: linux/arch/ia64/kernel/Makefile
> > =================================> > --- linux.orig/arch/ia64/kernel/Makefile      2008-12-23 13:11:27.000000000 +0800
> > +++ linux/arch/ia64/kernel/Makefile   2008-12-23 13:30:09.000000000 +0800
> > @@ -2,6 +2,10 @@
> >  # Makefile for the linux kernel.
> >  #
> >
> > +ifdef CONFIG_DYNAMIC_FTRACE
> > +CFLAGS_REMOVE_ftrace.o = -pg
> > +endif
> > +
> >  extra-y      := head.o init_task.o vmlinux.lds
> >
> >  obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o    \
> > @@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)  += cyclone.o
> >  obj-$(CONFIG_CPU_FREQ)               += cpufreq/
> >  obj-$(CONFIG_IA64_MCA_RECOVERY)      += mca_recovery.o
> >  obj-$(CONFIG_KPROBES)                += kprobes.o jprobes.o
> > +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
> >  obj-$(CONFIG_KEXEC)          += machine_kexec.o relocate_kernel.o crash.o
> >  obj-$(CONFIG_CRASH_DUMP)     += crash_dump.o
> >  obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)        += uncached.o
> > Index: linux/arch/ia64/kernel/ftrace.c
> > =================================> > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> > +++ linux/arch/ia64/kernel/ftrace.c   2008-12-23 13:30:09.000000000 +0800
> > @@ -0,0 +1,234 @@
> > +/*
> > + * Dynamic function tracing support.
> > + *
> > + * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
> > + *
> > + * For licencing details, see COPYING.
> > + *
> > + * Defines low-level handling of mcount calls when the kernel
> > + * is compiled with the -pg flag. When using dynamic ftrace, the
> > + * mcount call-sites get patched lazily with NOP till they are
> > + * enabled. All code mutation routines here take effect atomically.
> > + */
> > +
> > +#include <linux/uaccess.h>
> > +#include <linux/ftrace.h>
> > +
> > +#include <asm/cacheflush.h>
> > +#include <asm/ftrace.h>
> > +
> > +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
> > +     0x00, 0x00, 0x00, 0x00, 0x01, 0x00,     /* nop.m 0x0 */
> > +     0x00, 0x00, 0x00, 0x02, 0x00, 0x00,     /* nop.i 0x0 */
> > +     0x00, 0x00, 0x04, 0x00,                 /* nop.i 0x0 */
> > +     0x01, 0x00, 0x00, 0x00, 0x01, 0x00,     /* nop.m 0x0 */
> > +     0x00, 0x00, 0x00, 0x02, 0x00, 0x00,     /* nop.i 0x0 */
> > +     0x00, 0x00, 0x04, 0x00                  /* nop.i 0x0;; */
> > +};
> 
> As I stated before, you can not have a multi-line nop. The best you can
> do is add a jump over the entire call, and only if that jump is a single
> instruction.
> 
> Think about it, lets do a simple scenario.
> 
> Some process is running inside the kernel and starts to execute one of
> these 'nop' sequences. After executing the third nop, it is preempted
> (before finishing the other nops). During this preemption, you happen to
> start the tracer. Kstop_machine is called and all processes are now
> stopped.  The kstop_machine changes the nop to the call of a function
> tracer and resumes.  Now that original process gets scheduled back in,
> but the code is no longer nops, it has actual code and a call. But we
> missed the first 3 commands. Not to mention, it looks like the op codes
> are not even 4 or 8 byte aligned. So we could be executing something
> that is not even a command. BAM! Crash! kernel oops! ;-)
Makes sense, I'll try a jump approach.

> > +static unsigned char *ftrace_nop_replace(void)
> > +{
> > +     return ftrace_nop_code;
> > +}
> > +
> > +/* In IA64, each function will be added below two bundles with -pg option */
> > +static unsigned char __attribute__((aligned(8)))
> > +ftrace_call_code[MCOUNT_INSN_SIZE] = {
> > +     0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
> > +     0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
> > +     0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
> > +     0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
> > +     0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
> 
> If you made your own PLT stub, could you just change the one line to
> jump to that stub?
A simple jump to PLT stub doesn't work in IA64, as a lot of registers
should be saved. I'll do more investigation.

> > Index: linux/scripts/recordmcount.pl
> > =================================> > --- linux.orig/scripts/recordmcount.pl        2008-12-23 13:24:59.000000000 +0800
> > +++ linux/scripts/recordmcount.pl     2008-12-23 13:30:09.000000000 +0800
> > @@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
> >      $alignment = 2;
> >      $section_type = '%progbits';
> >
> > +} elsif ($arch eq "ia64") {
> > +    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
> > +    $type = "data8";
> > +
> > +    if ($is_module eq "0") {
> > +        $cc .= " -mconstant-gp";
> > +    }
> 
> I wonder if it would be better to pass in CFLAGS and then be able to
> parse that instead. Then we can find out a lot more about what we are
> working on.
CFLAGS seems have a lot of useless flags here.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
  2008-12-23 14:35 ` Steven Rostedt
  2008-12-24  0:54 ` Shaohua Li
@ 2008-12-24  1:00 ` Steven Rostedt
  2008-12-24  8:08 ` Shaohua Li
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Steven Rostedt @ 2008-12-24  1:00 UTC (permalink / raw)
  To: linux-ia64


On Wed, 2008-12-24 at 08:54 +0800, Shaohua Li wrote:

> 
> > > +static unsigned char *ftrace_nop_replace(void)
> > > +{
> > > +     return ftrace_nop_code;
> > > +}
> > > +
> > > +/* In IA64, each function will be added below two bundles with -pg option */
> > > +static unsigned char __attribute__((aligned(8)))
> > > +ftrace_call_code[MCOUNT_INSN_SIZE] = {
> > > +     0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
> > > +     0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
> > > +     0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
> > > +     0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
> > > +     0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
> > 
> > If you made your own PLT stub, could you just change the one line to
> > jump to that stub?
> A simple jump to PLT stub doesn't work in IA64, as a lot of registers
> should be saved. I'll do more investigation.

So the registers are different depending on what function is called?
That is, you would have to change more than one of these lines if you
were to change it from calling _mcount to calling ftrace_stub?


> 
> > > Index: linux/scripts/recordmcount.pl
> > > =================================> > > --- linux.orig/scripts/recordmcount.pl        2008-12-23 13:24:59.000000000 +0800
> > > +++ linux/scripts/recordmcount.pl     2008-12-23 13:30:09.000000000 +0800
> > > @@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
> > >      $alignment = 2;
> > >      $section_type = '%progbits';
> > >
> > > +} elsif ($arch eq "ia64") {
> > > +    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
> > > +    $type = "data8";
> > > +
> > > +    if ($is_module eq "0") {
> > > +        $cc .= " -mconstant-gp";
> > > +    }
> > 
> > I wonder if it would be better to pass in CFLAGS and then be able to
> > parse that instead. Then we can find out a lot more about what we are
> > working on.
> CFLAGS seems have a lot of useless flags here.

OK, I was hoping to start making the parameters a bit smaller. But this
is fine.

-- Steve



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (2 preceding siblings ...)
  2008-12-24  1:00 ` Steven Rostedt
@ 2008-12-24  8:08 ` Shaohua Li
  2008-12-24 13:29 ` Steven Rostedt
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-24  8:08 UTC (permalink / raw)
  To: linux-ia64

On Wed, Dec 24, 2008 at 09:00:24AM +0800, Steven Rostedt wrote:
> 
> On Wed, 2008-12-24 at 08:54 +0800, Shaohua Li wrote:
> 
> > 
> > > > +static unsigned char *ftrace_nop_replace(void)
> > > > +{
> > > > +     return ftrace_nop_code;
> > > > +}
> > > > +
> > > > +/* In IA64, each function will be added below two bundles with -pg option */
> > > > +static unsigned char __attribute__((aligned(8)))
> > > > +ftrace_call_code[MCOUNT_INSN_SIZE] = {
> > > > +     0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
> > > > +     0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
> > > > +     0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
> > > > +     0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
> > > > +     0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
> > > 
> > > If you made your own PLT stub, could you just change the one line to
> > > jump to that stub?
> > A simple jump to PLT stub doesn't work in IA64, as a lot of registers
> > should be saved. I'll do more investigation.
> 
> So the registers are different depending on what function is called?
> That is, you would have to change more than one of these lines if you
> were to change it from calling _mcount to calling ftrace_stub?
Yes, module has different gp register against kernel. Before jump to
kernel, the register must be saved.
We need insert trampoline code in each module. The code should do the
register save and jump to ftrace_caller. Add a weak function in module.h
seems ok for the trampoline code, but I don't know how to find the
function address.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (3 preceding siblings ...)
  2008-12-24  8:08 ` Shaohua Li
@ 2008-12-24 13:29 ` Steven Rostedt
  2008-12-24 21:50 ` Keith Owens
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Steven Rostedt @ 2008-12-24 13:29 UTC (permalink / raw)
  To: linux-ia64


On Wed, 2008-12-24 at 16:08 +0800, Shaohua Li wrote:
> On Wed, Dec 24, 2008 at 09:00:24AM +0800, Steven Rostedt wrote:
> > 
> > On Wed, 2008-12-24 at 08:54 +0800, Shaohua Li wrote:
> > 
> > > 
> > > > > +static unsigned char *ftrace_nop_replace(void)
> > > > > +{
> > > > > +     return ftrace_nop_code;
> > > > > +}
> > > > > +
> > > > > +/* In IA64, each function will be added below two bundles with -pg option */
> > > > > +static unsigned char __attribute__((aligned(8)))
> > > > > +ftrace_call_code[MCOUNT_INSN_SIZE] = {
> > > > > +     0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
> > > > > +     0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
> > > > > +     0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
> > > > > +     0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
> > > > > +     0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
> > > > 
> > > > If you made your own PLT stub, could you just change the one line to
> > > > jump to that stub?
> > > A simple jump to PLT stub doesn't work in IA64, as a lot of registers
> > > should be saved. I'll do more investigation.
> > 
> > So the registers are different depending on what function is called?
> > That is, you would have to change more than one of these lines if you
> > were to change it from calling _mcount to calling ftrace_stub?
> Yes, module has different gp register against kernel. Before jump to
> kernel, the register must be saved.
> We need insert trampoline code in each module. The code should do the
> register save and jump to ftrace_caller. Add a weak function in module.h
> seems ok for the trampoline code, but I don't know how to find the
> function address.

Yes I understand that the module and kernel code is set up differently,
PPC is pretty much the same in this aspect. I'm asking if it is easy to
change a call from the module to kernel core to another function in
kernel core?  I must apologize that I'm pretty ignorant to how ia64
works. So there very well can be something that I do not understand that
will prevent this.

Question: if I have a call from the module to _mcount, how much has to
change in the set up of the registers to make it call ftrace_call
instead?  Perhaps we could link in a call to ftrace_call via the tricks
in recordmcount.pl to get the info needed to make that change?

-- Steve



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (4 preceding siblings ...)
  2008-12-24 13:29 ` Steven Rostedt
@ 2008-12-24 21:50 ` Keith Owens
  2008-12-25  1:08 ` Shaohua Li
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Keith Owens @ 2008-12-24 21:50 UTC (permalink / raw)
  To: linux-ia64

On Wed, 24 Dec 2008 08:29:05 -0500, 
Steven Rostedt <srostedt@redhat.com> wrote:
>Yes I understand that the module and kernel code is set up differently,
>PPC is pretty much the same in this aspect. I'm asking if it is easy to
>change a call from the module to kernel core to another function in
>kernel core?
>
>Question: if I have a call from the module to _mcount, how much has to
>change in the set up of the registers to make it call ftrace_call
>instead?  Perhaps we could link in a call to ftrace_call via the tricks
>in recordmcount.pl to get the info needed to make that change?

The IA64 kernel uses the same gp register throughout, it is compiled
with -mconstant-gp.  So changing the target address from one kernel
function to another only requires changing the destination address in
the PLT stub, no other registers are affected.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (5 preceding siblings ...)
  2008-12-24 21:50 ` Keith Owens
@ 2008-12-25  1:08 ` Shaohua Li
  2008-12-25  3:54 ` Steven Rostedt
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-25  1:08 UTC (permalink / raw)
  To: linux-ia64

On Thu, Dec 25, 2008 at 05:50:50AM +0800, Keith Owens wrote:
> On Wed, 24 Dec 2008 08:29:05 -0500, 
> Steven Rostedt <srostedt@redhat.com> wrote:
> >Yes I understand that the module and kernel code is set up differently,
> >PPC is pretty much the same in this aspect. I'm asking if it is easy to
> >change a call from the module to kernel core to another function in
> >kernel core?
> >
> >Question: if I have a call from the module to _mcount, how much has to
> >change in the set up of the registers to make it call ftrace_call
> >instead?  Perhaps we could link in a call to ftrace_call via the tricks
> >in recordmcount.pl to get the info needed to make that change?
> 
> The IA64 kernel uses the same gp register throughout, it is compiled
> with -mconstant-gp.  So changing the target address from one kernel
> function to another only requires changing the destination address in
> the PLT stub, no other registers are affected.
yes, for kernel, this is simple. Just changing the target address is ok,
and the change is atomic, as it's a 64-bit write. For module, it's not
simple. Module has different gp register against kernel. In a module,
_mcount must save its gp first and then jump to kernel. That's why we
can't directly use a jump.

I'm considering link some code to ftrace_call in recordmcount.pl, but
recordmocunt.pl is called for each file. If a module has multiple files,
there will be some duplicate code. Another issue how can we find the
code's address when ftrace to convert code to nop.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (6 preceding siblings ...)
  2008-12-25  1:08 ` Shaohua Li
@ 2008-12-25  3:54 ` Steven Rostedt
  2008-12-25  4:01 ` Shaohua Li
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Steven Rostedt @ 2008-12-25  3:54 UTC (permalink / raw)
  To: linux-ia64


On Thu, 2008-12-25 at 09:08 +0800, Shaohua Li wrote:
> On Thu, Dec 25, 2008 at 05:50:50AM +0800, Keith Owens wrote:
> > On Wed, 24 Dec 2008 08:29:05 -0500, 
> > Steven Rostedt <srostedt@redhat.com> wrote:
> > >Yes I understand that the module and kernel code is set up differently,
> > >PPC is pretty much the same in this aspect. I'm asking if it is easy to
> > >change a call from the module to kernel core to another function in
> > >kernel core?
> > >
> > >Question: if I have a call from the module to _mcount, how much has to
> > >change in the set up of the registers to make it call ftrace_call
> > >instead?  Perhaps we could link in a call to ftrace_call via the tricks
> > >in recordmcount.pl to get the info needed to make that change?
> > 
> > The IA64 kernel uses the same gp register throughout, it is compiled
> > with -mconstant-gp.  So changing the target address from one kernel
> > function to another only requires changing the destination address in
> > the PLT stub, no other registers are affected.
> yes, for kernel, this is simple. Just changing the target address is ok,
> and the change is atomic, as it's a 64-bit write. For module, it's not
> simple. Module has different gp register against kernel. In a module,
> _mcount must save its gp first and then jump to kernel. That's why we
> can't directly use a jump.
> 
> I'm considering link some code to ftrace_call in recordmcount.pl, but
> recordmocunt.pl is called for each file. If a module has multiple files,
> there will be some duplicate code. Another issue how can we find the
> code's address when ftrace to convert code to nop.

Since this still sounds like PPC actions, I'll try to show a pseudo code
style example.

I'm assuming that a call to mcount from a module looks something like
this:

	save module gp
	load kernel gp
	jump to mcount (or to a mcount trampoline)

Since mcount and ftrace_caller share the same gp, could we not just
change that jmp to ftrace_caller instead? (or to a trampoline to
ftrace_caller as we do in PPC).

-- Steve



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (7 preceding siblings ...)
  2008-12-25  3:54 ` Steven Rostedt
@ 2008-12-25  4:01 ` Shaohua Li
  2008-12-26  2:42 ` Shaohua Li
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-25  4:01 UTC (permalink / raw)
  To: linux-ia64

On Thu, Dec 25, 2008 at 11:54:33AM +0800, Steven Rostedt wrote:
> 
> On Thu, 2008-12-25 at 09:08 +0800, Shaohua Li wrote:
> > On Thu, Dec 25, 2008 at 05:50:50AM +0800, Keith Owens wrote:
> > > On Wed, 24 Dec 2008 08:29:05 -0500, 
> > > Steven Rostedt <srostedt@redhat.com> wrote:
> > > >Yes I understand that the module and kernel code is set up differently,
> > > >PPC is pretty much the same in this aspect. I'm asking if it is easy to
> > > >change a call from the module to kernel core to another function in
> > > >kernel core?
> > > >
> > > >Question: if I have a call from the module to _mcount, how much has to
> > > >change in the set up of the registers to make it call ftrace_call
> > > >instead?  Perhaps we could link in a call to ftrace_call via the tricks
> > > >in recordmcount.pl to get the info needed to make that change?
> > > 
> > > The IA64 kernel uses the same gp register throughout, it is compiled
> > > with -mconstant-gp.  So changing the target address from one kernel
> > > function to another only requires changing the destination address in
> > > the PLT stub, no other registers are affected.
> > yes, for kernel, this is simple. Just changing the target address is ok,
> > and the change is atomic, as it's a 64-bit write. For module, it's not
> > simple. Module has different gp register against kernel. In a module,
> > _mcount must save its gp first and then jump to kernel. That's why we
> > can't directly use a jump.
> > 
> > I'm considering link some code to ftrace_call in recordmcount.pl, but
> > recordmocunt.pl is called for each file. If a module has multiple files,
> > there will be some duplicate code. Another issue how can we find the
> > code's address when ftrace to convert code to nop.
> 
> Since this still sounds like PPC actions, I'll try to show a pseudo code
> style example.
> 
> I'm assuming that a call to mcount from a module looks something like
> this:
> 
> 	save module gp
> 	load kernel gp
> 	jump to mcount (or to a mcount trampoline)
> 
> Since mcount and ftrace_caller share the same gp, could we not just
> change that jmp to ftrace_caller instead? (or to a trampoline to
> ftrace_caller as we do in PPC).
As the 25bit limit, we must use a mcount trampoline. In IA64, PLT stub
will do:
load kernel gp
jump to mcount
the PLT stub doesn't save gp, so it's not ok for the trampoline. This is
what I said We need add another trampoline code to module. Loading
module in IA64 only can add PLT stub, we need other approach for the
trampoline code.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (8 preceding siblings ...)
  2008-12-25  4:01 ` Shaohua Li
@ 2008-12-26  2:42 ` Shaohua Li
  2008-12-31  9:11 ` Shaohua Li
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-26  2:42 UTC (permalink / raw)
  To: linux-ia64

On Thu, Dec 25, 2008 at 12:01:11PM +0800, Shaohua Li wrote:
> On Thu, Dec 25, 2008 at 11:54:33AM +0800, Steven Rostedt wrote:
> > 
> > On Thu, 2008-12-25 at 09:08 +0800, Shaohua Li wrote:
> > > On Thu, Dec 25, 2008 at 05:50:50AM +0800, Keith Owens wrote:
> > > > On Wed, 24 Dec 2008 08:29:05 -0500, 
> > > > Steven Rostedt <srostedt@redhat.com> wrote:
> > > > >Yes I understand that the module and kernel code is set up differently,
> > > > >PPC is pretty much the same in this aspect. I'm asking if it is easy to
> > > > >change a call from the module to kernel core to another function in
> > > > >kernel core?
> > > > >
> > > > >Question: if I have a call from the module to _mcount, how much has to
> > > > >change in the set up of the registers to make it call ftrace_call
> > > > >instead?  Perhaps we could link in a call to ftrace_call via the tricks
> > > > >in recordmcount.pl to get the info needed to make that change?
> > > > 
> > > > The IA64 kernel uses the same gp register throughout, it is compiled
> > > > with -mconstant-gp.  So changing the target address from one kernel
> > > > function to another only requires changing the destination address in
> > > > the PLT stub, no other registers are affected.
> > > yes, for kernel, this is simple. Just changing the target address is ok,
> > > and the change is atomic, as it's a 64-bit write. For module, it's not
> > > simple. Module has different gp register against kernel. In a module,
> > > _mcount must save its gp first and then jump to kernel. That's why we
> > > can't directly use a jump.
> > > 
> > > I'm considering link some code to ftrace_call in recordmcount.pl, but
> > > recordmocunt.pl is called for each file. If a module has multiple files,
> > > there will be some duplicate code. Another issue how can we find the
> > > code's address when ftrace to convert code to nop.
> > 
> > Since this still sounds like PPC actions, I'll try to show a pseudo code
> > style example.
> > 
> > I'm assuming that a call to mcount from a module looks something like
> > this:
> > 
> > 	save module gp
> > 	load kernel gp
> > 	jump to mcount (or to a mcount trampoline)
> > 
> > Since mcount and ftrace_caller share the same gp, could we not just
> > change that jmp to ftrace_caller instead? (or to a trampoline to
> > ftrace_caller as we do in PPC).
> As the 25bit limit, we must use a mcount trampoline. In IA64, PLT stub
> will do:
> load kernel gp
> jump to mcount
> the PLT stub doesn't save gp, so it's not ok for the trampoline. This is
> what I said We need add another trampoline code to module. Loading
> module in IA64 only can add PLT stub, we need other approach for the
> trampoline code.
Tony:
The mcount call code is:
	alloc r40=ar.pfs,12,8,0
	mov r43=r0;;
	mov r42°
	mov r41=r1
	nop.i 0x0
	br.call.sptk.many b0 = _mcount;;

To convert it to nop, we can change it to:
	alloc r40=ar.pfs,12,8,0
	mov r43=r0;;
	mov r42°
	mov r41=r1
	nop.i 0x0
	nop.b 0x0

This code hasn't any impact to later instructions. Could we treat this
code as nop? It still executes some instructions. Not sure if this is
heavy.

Another light approach is, the code to be nop as:
	nop.m 0x0
	mov r3 = ip
	nop.b 0x0
	nop.m 0x0
	nop.i 0x0
	nop.i 0x0

We can change it back to:
	nop.m 0x0
	mov r3 = ip
	br.sptk.many trampoline
	nop.m 0x0
	nop.i 0x0
	nop.i 0x0

In the trampoline code, we then call _mcount. This approach still need
one extra instruction executed (second instruction) even for nop.
This should be lighter, but be more complex (add trampoline code to module)

Both the two methods should be ok for dyn ftrace, as we only change one
instrction one time and the instrcution is in one aligned long.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (9 preceding siblings ...)
  2008-12-26  2:42 ` Shaohua Li
@ 2008-12-31  9:11 ` Shaohua Li
  2009-01-06  0:42 ` Luck, Tony
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2008-12-31  9:11 UTC (permalink / raw)
  To: linux-ia64

On Tue, 2008-12-23 at 14:08 +0800, Shaohua Li wrote:
> IA64 dynamic ftrace support. The main tricky thing here is to support module.
> In a module, each routine's mcount call will call a PLT stub, which
> will call kernel mcount. We can't simply make the mcount call call into
> kernel mcount, as kernel and mocule have different gp and the
> instruction just supports 25bit offset. So I introduced a new PLT stub,
> which will call into kernel ftrace_caller. When module loading, all
> mcount call will be converted to nop. When the nop is converted to call,
> we make the call to the new PLT stub instead of old mcount PLT stub.
This is the new implementation for dyn-ftrace. Previous 4 patches are
still required and can be applied to latest kernel too. Just replace the
5 patch with this one. This new patch should address Steven's concern.

IA64 dynamic ftrace support.
The original _mcount stub for each function is like:
	alloc r40=ar.pfs,12,8,0
	mov r43=r0;;
	mov r42°
	mov r41=r1
	nop.i 0x0
	br.call.sptk.many b0 = _mcount;;

The patch convert it to below for nop:
	[MII] nop.m 0x0
	mov r3=ip
	nop.i 0x0
	[MLX] nop.m 0x0
	nop.x 0x0;;
This isn't completely nop, as there is one instuction 'mov r3=ip', but
it should be light and harmless for code follow it.

And below is for call
	[MII] nop.m 0x0
	mov r3=ip
	nop.i 0x0
	[MLX] nop.m 0x0
	brl.many .;;
In this way, only one instruction is changed to convert code between nop
and call. This should meet dyn-ftrace's requirement.
But this requires CPU support brl instruction, so dyn-ftrace isn't
supported for old Itanium system. Assume there are quite few such old
system running.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
---
 arch/ia64/Kconfig              |    2 
 arch/ia64/include/asm/ftrace.h |   13 ++
 arch/ia64/kernel/Makefile      |    5 
 arch/ia64/kernel/entry.S       |   52 +++++++++
 arch/ia64/kernel/ftrace.c      |  235 +++++++++++++++++++++++++++++++++++++++++
 scripts/recordmcount.pl        |    7 +
 6 files changed, 314 insertions(+)

Index: linux/arch/ia64/Kconfig
=================================--- linux.orig/arch/ia64/Kconfig	2008-12-31 10:30:26.000000000 +0800
+++ linux/arch/ia64/Kconfig	2008-12-31 16:55:05.000000000 +0800
@@ -21,6 +21,8 @@ config IA64
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
 	select HAVE_KVM
Index: linux/arch/ia64/kernel/Makefile
=================================--- linux.orig/arch/ia64/kernel/Makefile	2008-12-31 10:30:26.000000000 +0800
+++ linux/arch/ia64/kernel/Makefile	2008-12-31 10:30:41.000000000 +0800
@@ -2,6 +2,10 @@
 # Makefile for the linux kernel.
 #
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
Index: linux/arch/ia64/kernel/ftrace.c
=================================--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/ia64/kernel/ftrace.c	2008-12-31 16:58:56.000000000 +0800
@@ -0,0 +1,235 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+	0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+	0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2;
+	u64 dummy3:48;
+	u64 imm39_l:16;
+	u64 imm39_h:23;
+	u64 dummy4:13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 i:1;
+	u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset = addr - (ip + 0x10);
+
+	code->imm39_l = offset >> 24;
+	code->imm39_h = offset >> 40;
+	code->imm20 = offset >> 4;
+	code->i = offset >> 63;
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	unsigned long ip = rec->ip;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	if (rec->flags & FTRACE_FL_CONVERTED) {
+		struct ftrace_call_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_call_code;
+		tmp_call = (void *)replaced;
+		call_insn->imm39_l = tmp_call->imm39_l;
+		call_insn->imm39_h = tmp_call->imm39_h;
+		call_insn->imm20 = tmp_call->imm20;
+		call_insn->i = tmp_call->i;
+		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	} else {
+		struct ftrace_orig_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_orig_code;
+		tmp_call = (void *)replaced;
+		call_insn->sign = tmp_call->sign;
+		call_insn->imm20 = tmp_call->imm20;
+		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret;
+	char *new;
+
+	ret = ftrace_make_nop_check(rec, addr);
+	if (ret)
+		return ret;
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned char *old, *new;
+
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+struct ftrace_caller_code {
+	u64 dummy1:46;
+	u64 imm41_18:64-46;
+	u64 imm41_23:41-(64-46);
+	u64 dummy2:13;
+	u64 imm7b:7;
+	u64 dummy3:1;
+	u64 ic:1;
+	u64 imm5c:5;
+	u64 imm9d:9;
+	u64 i:1;
+	u64 dummy4:4;
+};
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+	struct ftrace_caller_code *code =  (void *)addr;
+
+	if (func = ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	code->i = ip >> 63;
+	code->imm9d = ip >> 7;
+	code->imm5c = ip >> 16;
+	code->ic = ip >> 21;
+	code->imm7b = ip & 0x7f;
+	code->imm41_18 = ip >> 22;
+	code->imm41_23 = ip >> 40;
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+extern void ftrace_patch_gp(void);
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	unsigned long ip = ((struct fnptr *)ftrace_patch_gp)->ip;
+	unsigned long gp = ((struct fnptr *)ftrace_dyn_arch_init)->gp;
+
+	ia64_patch_imm64(ip + 2, gp);
+	flush_icache_range(ip, ip + 16);
+
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
Index: linux/scripts/recordmcount.pl
=================================--- linux.orig/scripts/recordmcount.pl	2008-12-31 10:30:26.000000000 +0800
+++ linux/scripts/recordmcount.pl	2008-12-31 10:30:41.000000000 +0800
@@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
     $alignment = 2;
     $section_type = '%progbits';
 
+} elsif ($arch eq "ia64") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+    $type = "data8";
+
+    if ($is_module eq "0") {
+        $cc .= " -mconstant-gp";
+    }
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }
Index: linux/arch/ia64/include/asm/ftrace.h
=================================--- linux.orig/arch/ia64/include/asm/ftrace.h	2008-12-31 10:30:26.000000000 +0800
+++ linux/arch/ia64/include/asm/ftrace.h	2008-12-31 10:30:41.000000000 +0800
@@ -8,6 +8,19 @@
 extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
 #define mcount _mcount
 
+#include <asm/kprobes.h>
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* second bundle, insn 2 */
+	return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
 #endif
 
 #endif /* CONFIG_FUNCTION_TRACER */
Index: linux/arch/ia64/kernel/entry.S
=================================--- linux.orig/arch/ia64/kernel/entry.S	2008-12-31 10:30:26.000000000 +0800
+++ linux/arch/ia64/kernel/entry.S	2008-12-31 15:26:21.000000000 +0800
@@ -1406,6 +1406,57 @@ GLOBAL_ENTRY(unw_init_running)
 END(unw_init_running)
 
 #ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+	br ftrace_stub
+END(_mcount)
+
+.here:
+	br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+	alloc out0 = ar.pfs, 8, 0, 4, 0
+	mov out3 = r0
+	;;
+	mov out2 = b0
+	add r3 = 0x20, r3
+	mov out1 = r1;
+	br.call.sptk.many b0 = ftrace_patch_gp
+	//this might be called from module, so we must patch gp
+.global ftrace_patch_gp;
+ftrace_patch_gp:
+	movl gp=0
+	mov b0 = r3
+	;;
+.global ftrace_call;
+ftrace_call:
+{
+	.mlx
+	nop.m 0x0
+	movl r3 = .here;;
+}
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(ftrace_caller)
+
+#else
 GLOBAL_ENTRY(_mcount)
 	movl r2 = ftrace_stub
 	movl r3 = ftrace_trace_function;;
@@ -1435,6 +1486,7 @@ GLOBAL_ENTRY(_mcount)
 	br ftrace_stub
 	;;
 END(_mcount)
+#endif
 
 GLOBAL_ENTRY(ftrace_stub)
 	mov r3 = b0



^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (10 preceding siblings ...)
  2008-12-31  9:11 ` Shaohua Li
@ 2009-01-06  0:42 ` Luck, Tony
  2009-01-08  8:05 ` Shaohua Li
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Luck, Tony @ 2009-01-06  0:42 UTC (permalink / raw)
  To: linux-ia64

> In this way, only one instruction is changed to convert code between nop
> and call. This should meet dyn-ftrace's requirement.
> But this requires CPU support brl instruction, so dyn-ftrace isn't
> supported for old Itanium system. Assume there are quite few such old
> system running.

Perhaps very few old Merced systems are used for development
and performance tuning (the kind of work where ftrace is useful).
So not having ftrace support on them might not be too big a loss.

-Tony

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (11 preceding siblings ...)
  2009-01-06  0:42 ` Luck, Tony
@ 2009-01-08  8:05 ` Shaohua Li
  2009-01-08 17:08 ` Steven Rostedt
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2009-01-08  8:05 UTC (permalink / raw)
  To: linux-ia64

On Tue, 2009-01-06 at 08:42 +0800, Luck, Tony wrote:
> > In this way, only one instruction is changed to convert code between nop
> > and call. This should meet dyn-ftrace's requirement.
> > But this requires CPU support brl instruction, so dyn-ftrace isn't
> > supported for old Itanium system. Assume there are quite few such old
> > system running.
> 
> Perhaps very few old Merced systems are used for development
> and performance tuning (the kind of work where ftrace is useful).
> So not having ftrace support on them might not be too big a loss.
Tony,
I simplified the patch to make it more clean, no functional change with
the one I sent last week. please give it a try and consider merge if
possible.

Thanks,
Shaohua



IA64 dynamic ftrace support.
The original _mcount stub for each function is like:
	alloc r40=ar.pfs,12,8,0
	mov r43=r0;;
	mov r42°
	mov r41=r1
	nop.i 0x0
	br.call.sptk.many b0 = _mcount;;

The patch convert it to below for nop:
	[MII] nop.m 0x0
	mov r3=ip
	nop.i 0x0
	[MLX] nop.m 0x0
	nop.x 0x0;;
This isn't completely nop, as there is one instuction 'mov r3=ip', but
it should be light and harmless for code follow it.

And below is for call
	[MII] nop.m 0x0
	mov r3=ip
	nop.i 0x0
	[MLX] nop.m 0x0
	brl.many .;;
In this way, only one instruction is changed to convert code between nop
and call. This should meet dyn-ftrace's requirement.
But this requires CPU support brl instruction, so dyn-ftrace isn't
supported for old Itanium system. Assume there are quite few such old
system running.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
---
 arch/ia64/Kconfig              |    2 
 arch/ia64/include/asm/ftrace.h |   13 ++
 arch/ia64/kernel/Makefile      |    5 
 arch/ia64/kernel/entry.S       |   51 ++++++++++
 arch/ia64/kernel/ftrace.c      |  206 +++++++++++++++++++++++++++++++++++++++++
 scripts/recordmcount.pl        |    7 +
 6 files changed, 284 insertions(+)

Index: linux/arch/ia64/Kconfig
=================================--- linux.orig/arch/ia64/Kconfig	2009-01-08 14:50:50.000000000 +0800
+++ linux/arch/ia64/Kconfig	2009-01-08 14:50:58.000000000 +0800
@@ -21,6 +21,8 @@ config IA64
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
 	select HAVE_KVM
Index: linux/arch/ia64/kernel/Makefile
=================================--- linux.orig/arch/ia64/kernel/Makefile	2009-01-06 15:43:39.000000000 +0800
+++ linux/arch/ia64/kernel/Makefile	2009-01-08 14:50:58.000000000 +0800
@@ -2,6 +2,10 @@
 # Makefile for the linux kernel.
 #
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
Index: linux/arch/ia64/kernel/ftrace.c
=================================--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/ia64/kernel/ftrace.c	2009-01-08 15:52:09.000000000 +0800
@@ -0,0 +1,206 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42° */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+	0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+	0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2;
+	u64 dummy3:48;
+	u64 imm39_l:16;
+	u64 imm39_h:23;
+	u64 dummy4:13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 i:1;
+	u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset = addr - (ip + 0x10);
+
+	code->imm39_l = offset >> 24;
+	code->imm39_h = offset >> 40;
+	code->imm20 = offset >> 4;
+	code->i = offset >> 63;
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	unsigned long ip = rec->ip;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	if (rec->flags & FTRACE_FL_CONVERTED) {
+		struct ftrace_call_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_call_code;
+		tmp_call = (void *)replaced;
+		call_insn->imm39_l = tmp_call->imm39_l;
+		call_insn->imm39_h = tmp_call->imm39_h;
+		call_insn->imm20 = tmp_call->imm20;
+		call_insn->i = tmp_call->i;
+		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	} else {
+		struct ftrace_orig_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_orig_code;
+		tmp_call = (void *)replaced;
+		call_insn->sign = tmp_call->sign;
+		call_insn->imm20 = tmp_call->imm20;
+		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret;
+	char *new;
+
+	ret = ftrace_make_nop_check(rec, addr);
+	if (ret)
+		return ret;
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned char *old, *new;
+
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+	if (func = ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	ia64_patch_imm64(addr + 2, ip);
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
Index: linux/scripts/recordmcount.pl
=================================--- linux.orig/scripts/recordmcount.pl	2009-01-08 14:50:54.000000000 +0800
+++ linux/scripts/recordmcount.pl	2009-01-08 14:50:58.000000000 +0800
@@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
     $alignment = 2;
     $section_type = '%progbits';
 
+} elsif ($arch eq "ia64") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+    $type = "data8";
+
+    if ($is_module eq "0") {
+        $cc .= " -mconstant-gp";
+    }
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }
Index: linux/arch/ia64/include/asm/ftrace.h
=================================--- linux.orig/arch/ia64/include/asm/ftrace.h	2009-01-08 14:50:50.000000000 +0800
+++ linux/arch/ia64/include/asm/ftrace.h	2009-01-08 14:50:58.000000000 +0800
@@ -8,6 +8,19 @@
 extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
 #define mcount _mcount
 
+#include <asm/kprobes.h>
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* second bundle, insn 2 */
+	return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
 #endif
 
 #endif /* CONFIG_FUNCTION_TRACER */
Index: linux/arch/ia64/kernel/entry.S
=================================--- linux.orig/arch/ia64/kernel/entry.S	2009-01-08 14:50:50.000000000 +0800
+++ linux/arch/ia64/kernel/entry.S	2009-01-08 15:52:09.000000000 +0800
@@ -1406,6 +1406,56 @@ GLOBAL_ENTRY(unw_init_running)
 END(unw_init_running)
 
 #ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+	br ftrace_stub
+END(_mcount)
+
+.here:
+	br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+	alloc out0 = ar.pfs, 8, 0, 4, 0
+	mov out3 = r0
+	;;
+	mov out2 = b0
+	add r3 = 0x20, r3
+	mov out1 = r1;
+	br.call.sptk.many b0 = ftrace_patch_gp
+	//this might be called from module, so we must patch gp
+ftrace_patch_gp:
+	movl gp=__gp
+	mov b0 = r3
+	;;
+.global ftrace_call;
+ftrace_call:
+{
+	.mlx
+	nop.m 0x0
+	movl r3 = .here;;
+}
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(ftrace_caller)
+
+#else
 GLOBAL_ENTRY(_mcount)
 	movl r2 = ftrace_stub
 	movl r3 = ftrace_trace_function;;
@@ -1435,6 +1485,7 @@ GLOBAL_ENTRY(_mcount)
 	br ftrace_stub
 	;;
 END(_mcount)
+#endif
 
 GLOBAL_ENTRY(ftrace_stub)
 	mov r3 = b0



^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (12 preceding siblings ...)
  2009-01-08  8:05 ` Shaohua Li
@ 2009-01-08 17:08 ` Steven Rostedt
  2009-01-08 20:25 ` Luck, Tony
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Steven Rostedt @ 2009-01-08 17:08 UTC (permalink / raw)
  To: linux-ia64


On Thu, 2009-01-08 at 16:05 +0800, Shaohua Li wrote:
> On Tue, 2009-01-06 at 08:42 +0800, Luck, Tony wrote:
> > > In this way, only one instruction is changed to convert code between nop
> > > and call. This should meet dyn-ftrace's requirement.
> > > But this requires CPU support brl instruction, so dyn-ftrace isn't
> > > supported for old Itanium system. Assume there are quite few such old
> > > system running.
> > 
> > Perhaps very few old Merced systems are used for development
> > and performance tuning (the kind of work where ftrace is useful).
> > So not having ftrace support on them might not be too big a loss.
> Tony,
> I simplified the patch to make it more clean, no functional change with
> the one I sent last week. please give it a try and consider merge if
> possible.

> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> ---
>  arch/ia64/Kconfig              |    2 
>  arch/ia64/include/asm/ftrace.h |   13 ++
>  arch/ia64/kernel/Makefile      |    5 
>  arch/ia64/kernel/entry.S       |   51 ++++++++++
>  arch/ia64/kernel/ftrace.c      |  206 +++++++++++++++++++++++++++++++++++++++++
>  scripts/recordmcount.pl        |    7 +

Shaohua,

Are you changes now confined to ia64 (besides the recordmcount.pl)
change?  Any changes to the ftrace infrastructure we would like to go
through tip. This will give it proper testing against all archs before
pushing it to mainline.

Please send me any patches outside of arch/ia64 that we can pull in.
This includes the recordmcount.pl patch.

Also, send it to rostedt@goodmis.org that account has my scripts to pull
into git.

Thanks,

-- Steve





^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (13 preceding siblings ...)
  2009-01-08 17:08 ` Steven Rostedt
@ 2009-01-08 20:25 ` Luck, Tony
  2009-01-08 22:24 ` Luck, Tony
  2009-01-09  2:42 ` Shaohua Li
  16 siblings, 0 replies; 18+ messages in thread
From: Luck, Tony @ 2009-01-08 20:25 UTC (permalink / raw)
  To: linux-ia64

> The patch convert it to below for nop:
>       [MII] nop.m 0x0
>       mov r3=ip
>       nop.i 0x0
>       [MLX] nop.m 0x0
>       nop.x 0x0;;
> This isn't completely nop, as there is one instuction 'mov r3=ip', but
> it should be light and harmless for code follow it.

Did you consider using predicate registers to enable/disable.  E.g.
using something like this (using your currrent calling convention):

        MMI     cmp.ne p6,p0=r0,r0;;
                nop.m 0
        (p06)   mov r3=ip
        MLX     nop.m 0
        (p06)   brl.many <target_address>

Then just patch the "cmp" instruction to something that makes p6 true to enable.

Like this it may not be any better though ... although it avoids
doing the "mov r3=ip" when the tracepoint is disabled ... it may still
mess with the branch prediction logic and update entries in the branch
target cache before the processor realizes that p6 is false and so the
branch should be squashed.

But it does give you the flexibility to pick almost any 5 instructions
for your stub (so long as they can fit within the available templates)
while still allowing ftrace to enable/disable them by patching just one
instruction.  So you might think of some smarter way to do this.

-Tony

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (14 preceding siblings ...)
  2009-01-08 20:25 ` Luck, Tony
@ 2009-01-08 22:24 ` Luck, Tony
  2009-01-09  2:42 ` Shaohua Li
  16 siblings, 0 replies; 18+ messages in thread
From: Luck, Tony @ 2009-01-08 22:24 UTC (permalink / raw)
  To: linux-ia64

> Are you changes now confined to ia64 (besides the recordmcount.pl)
> change?  Any changes to the ftrace infrastructure we would like to go
> through tip. This will give it proper testing against all archs before
> pushing it to mainline.
>
> Please send me any patches outside of arch/ia64 that we can pull in.
> This includes the recordmcount.pl patch.

ia64 bits look ok to me.  We may be able to tune some bits to
make them better, but I'd be fine with the current version
going in.

Should I be trying to squeak the ia64 bits into the last few days
of the current merge window ... or are you going to save the
generic bits like the recordmcount.pl changes for 2.6.30?

-Tony


^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 5/5] IA64 dynamic ftrace support
  2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
                   ` (15 preceding siblings ...)
  2009-01-08 22:24 ` Luck, Tony
@ 2009-01-09  2:42 ` Shaohua Li
  16 siblings, 0 replies; 18+ messages in thread
From: Shaohua Li @ 2009-01-09  2:42 UTC (permalink / raw)
  To: linux-ia64

On Thu, 2009-01-08 at 13:25 -0700, Luck, Tony wrote:
> > The patch convert it to below for nop:
> >       [MII] nop.m 0x0
> >       mov r3=ip
> >       nop.i 0x0
> >       [MLX] nop.m 0x0
> >       nop.x 0x0;;
> > This isn't completely nop, as there is one instuction 'mov r3=ip', but
> > it should be light and harmless for code follow it.
> 
> Did you consider using predicate registers to enable/disable.  E.g.
> using something like this (using your currrent calling convention):
> 
>         MMI     cmp.ne p6,p0=r0,r0;;
>                 nop.m 0
>         (p06)   mov r3=ip
>         MLX     nop.m 0
>         (p06)   brl.many <target_address>
> 
> Then just patch the "cmp" instruction to something that makes p6 true to enable.
> 
> Like this it may not be any better though ... although it avoids
> doing the "mov r3=ip" when the tracepoint is disabled ... it may still
> mess with the branch prediction logic and update entries in the branch
> target cache before the processor realizes that p6 is false and so the
> branch should be squashed.
> 
> But it does give you the flexibility to pick almost any 5 instructions
> for your stub (so long as they can fit within the available templates)
> while still allowing ftrace to enable/disable them by patching just one
> instruction.  So you might think of some smarter way to do this.
Ya, this should work too. Is it a nop (or as light as a nop) if an
instruction has false prediction? the 'mov r3=ip' takes overhead, but we
need it in current implementation. Better we can remove the overhead.
Instruction room doesn't matter now. instruction overhead is concerned.

Thanks,
Shaohua


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2009-01-09  2:42 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-12-23  6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
2008-12-23 14:35 ` Steven Rostedt
2008-12-24  0:54 ` Shaohua Li
2008-12-24  1:00 ` Steven Rostedt
2008-12-24  8:08 ` Shaohua Li
2008-12-24 13:29 ` Steven Rostedt
2008-12-24 21:50 ` Keith Owens
2008-12-25  1:08 ` Shaohua Li
2008-12-25  3:54 ` Steven Rostedt
2008-12-25  4:01 ` Shaohua Li
2008-12-26  2:42 ` Shaohua Li
2008-12-31  9:11 ` Shaohua Li
2009-01-06  0:42 ` Luck, Tony
2009-01-08  8:05 ` Shaohua Li
2009-01-08 17:08 ` Steven Rostedt
2009-01-08 20:25 ` Luck, Tony
2009-01-08 22:24 ` Luck, Tony
2009-01-09  2:42 ` Shaohua Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox