From: Steven Rostedt <srostedt@redhat.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [PATCH 5/5] IA64 dynamic ftrace support
Date: Tue, 23 Dec 2008 14:35:49 +0000 [thread overview]
Message-ID: <1230042949.30177.103.camel@localhost.localdomain> (raw)
In-Reply-To: <1230012500.10933.102.camel@sli10-desk.sh.intel.com>
On Tue, 2008-12-23 at 14:08 +0800, Shaohua Li wrote:
> IA64 dynamic ftrace support. The main tricky thing here is to support module.
> In a module, each routine's mcount call will call a PLT stub, which
> will call kernel mcount. We can't simply make the mcount call call into
> kernel mcount, as kernel and mocule have different gp and the
> instruction just supports 25bit offset. So I introduced a new PLT stub,
> which will call into kernel ftrace_caller. When module loading, all
> mcount call will be converted to nop. When the nop is converted to call,
> we make the call to the new PLT stub instead of old mcount PLT stub.
>
> Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> ---
> arch/ia64/Kconfig | 2
> arch/ia64/include/asm/ftrace.h | 17 ++
> arch/ia64/include/asm/module.h | 4
> arch/ia64/kernel/Makefile | 5
> arch/ia64/kernel/entry.S | 37 ++++++
> arch/ia64/kernel/ftrace.c | 234 +++++++++++++++++++++++++++++++++++++++++
> arch/ia64/kernel/module.c | 15 ++
> scripts/recordmcount.pl | 7 +
> 8 files changed, 321 insertions(+)
>
> Index: linux/arch/ia64/Kconfig
> =================================> --- linux.orig/arch/ia64/Kconfig 2008-12-23 13:13:17.000000000 +0800
> +++ linux/arch/ia64/Kconfig 2008-12-23 13:30:09.000000000 +0800
> @@ -21,6 +21,8 @@ config IA64
> select HAVE_OPROFILE
> select HAVE_KPROBES
> select HAVE_KRETPROBES
> + select HAVE_FTRACE_MCOUNT_RECORD
> + select HAVE_DYNAMIC_FTRACE
> select HAVE_FUNCTION_TRACER
> select HAVE_DMA_ATTRS
> select HAVE_KVM
> Index: linux/arch/ia64/kernel/Makefile
> =================================> --- linux.orig/arch/ia64/kernel/Makefile 2008-12-23 13:11:27.000000000 +0800
> +++ linux/arch/ia64/kernel/Makefile 2008-12-23 13:30:09.000000000 +0800
> @@ -2,6 +2,10 @@
> # Makefile for the linux kernel.
> #
>
> +ifdef CONFIG_DYNAMIC_FTRACE
> +CFLAGS_REMOVE_ftrace.o = -pg
> +endif
> +
> extra-y := head.o init_task.o vmlinux.lds
>
> obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
> @@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
> obj-$(CONFIG_CPU_FREQ) += cpufreq/
> obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
> obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
> +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
> obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
> obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
> obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
> Index: linux/arch/ia64/kernel/ftrace.c
> =================================> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux/arch/ia64/kernel/ftrace.c 2008-12-23 13:30:09.000000000 +0800
> @@ -0,0 +1,234 @@
> +/*
> + * Dynamic function tracing support.
> + *
> + * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
> + *
> + * For licencing details, see COPYING.
> + *
> + * Defines low-level handling of mcount calls when the kernel
> + * is compiled with the -pg flag. When using dynamic ftrace, the
> + * mcount call-sites get patched lazily with NOP till they are
> + * enabled. All code mutation routines here take effect atomically.
> + */
> +
> +#include <linux/uaccess.h>
> +#include <linux/ftrace.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/ftrace.h>
> +
> +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
> + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* nop.m 0x0 */
> + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
> + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
> + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, /* nop.m 0x0 */
> + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
> + 0x00, 0x00, 0x04, 0x00 /* nop.i 0x0;; */
> +};
As I stated before, you can not have a multi-line nop. The best you can
do is add a jump over the entire call, and only if that jump is a single
instruction.
Think about it, lets do a simple scenario.
Some process is running inside the kernel and starts to execute one of
these 'nop' sequences. After executing the third nop, it is preempted
(before finishing the other nops). During this preemption, you happen to
start the tracer. Kstop_machine is called and all processes are now
stopped. The kstop_machine changes the nop to the call of a function
tracer and resumes. Now that original process gets scheduled back in,
but the code is no longer nops, it has actual code and a call. But we
missed the first 3 commands. Not to mention, it looks like the op codes
are not even 4 or 8 byte aligned. So we could be executing something
that is not even a command. BAM! Crash! kernel oops! ;-)
> +
> +static unsigned char *ftrace_nop_replace(void)
> +{
> + return ftrace_nop_code;
> +}
> +
> +/* In IA64, each function will be added below two bundles with -pg option */
> +static unsigned char __attribute__((aligned(8)))
> +ftrace_call_code[MCOUNT_INSN_SIZE] = {
> + 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
> + 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
> + 0x05, 0x00, 0xc4, 0x00, /* mov r42° */
> + 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
> + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
If you made your own PLT stub, could you just change the one line to
jump to that stub?
> + 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
> +};
> +
> +struct ftrace_call_insn {
> + u64 dummy1, dummy2, dummy3;
> + u64 dummy4:64-41+13;
> + u64 imm20:20;
> + u64 dummy5:3;
> + u64 sign:1;
> + u64 dummy6:4;
> +};
> +
> +static void calculate_offset(unsigned long ip, unsigned long addr,
> + unsigned long *offset, int *sign)
> +{
> + long of = addr - (ip + 0x10);
> + if (of < 0)
> + *sign = 1;
> + else
> + *sign = 0;
> + *offset = of >> 4;
> +}
> +
> +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
> +{
> + struct ftrace_call_insn *code = (void *)ftrace_call_code;
> + unsigned long offset;
> + int sign;
> +
> + calculate_offset(ip, addr, &offset, &sign);
> + code->sign = sign;
> + code->imm20 = offset;
> +
> + return ftrace_call_code;
> +}
> +
> +static int
> +ftrace_modify_code(unsigned long ip, unsigned char *old_code,
> + unsigned char *new_code, int do_check)
> +{
> + unsigned char replaced[MCOUNT_INSN_SIZE];
> +
> + /*
> + * Note: Due to modules and __init, code can
> + * disappear and change, we need to protect against faulting
> + * as well as code changing. We do this by using the
> + * probe_kernel_* functions.
> + *
> + * No real locking needed, this code is run through
> + * kstop_machine, or before SMP starts.
> + */
> +
> + if (!do_check)
> + goto skip_check;
> +
> + /* read the text we want to modify */
> + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
> + return -EFAULT;
> +
> + /* Make sure it is what we expect it to be */
> + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
> + return -EINVAL;
> +
> +skip_check:
> + /* replace the text with the new text */
> + if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
> + return -EPERM;
> +
> + flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
> +
> + return 0;
> +}
> +
> +int ftrace_make_nop(struct module *mod,
> + struct dyn_ftrace *rec, unsigned long addr)
> +{
> + unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
> + struct ftrace_call_insn *call_insn = (void *)ftrace_call_code, *tmp_call;
> + unsigned long ip = rec->ip;
> + int is_kernel_address = core_kernel_text(ip);
> + unsigned char *old, *new;
> + int do_check = 0;
> +
> + if (is_kernel_address) {
> + do_check = 1;
> + old = ftrace_call_replace(ip, addr);
> + goto modify;
> + }
> +
> + if (!rec->arch.mod) {
> + if (!mod) {
> + printk(KERN_ERR "No module loaded addr=%lx\n",
> + addr);
> + return -EFAULT;
> + }
> + rec->arch.mod = mod;
> + } else if (mod) {
> + if (mod != rec->arch.mod) {
> + printk(KERN_ERR
> + "Record mod %p not equal to passed in mod %p\n",
> + rec->arch.mod, mod);
> + return -EINVAL;
> + }
> + /* nothing to do if mod = rec->arch.mod */
> + } else
> + mod = rec->arch.mod;
> +
> + /* the mcount call is a call to a PLT entry */
> + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
> + return -EFAULT;
> + tmp_call = (void *)replaced;
> + call_insn->sign = tmp_call->sign;
> + call_insn->imm20 = tmp_call->imm20;
> + if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
> + return -EINVAL;
> + old = ftrace_call_code;
> +
> +modify:
> + new = ftrace_nop_replace();
> + return ftrace_modify_code(ip, old, new, do_check);
> +}
> +
> +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> +{
> + unsigned long ip = rec->ip;
> + int is_kernel_address = core_kernel_text(ip);
> + struct module *mod;
> + unsigned char *old, *new;
> +
> + if (is_kernel_address)
> + goto modify;
> +
> + if (!rec->arch.mod) {
> + printk(KERN_ERR "No module loaded\n");
> + return -EINVAL;
> + }
> + mod = rec->arch.mod;
> + if (!mod->arch.ftrace_caller_plt) {
> + printk(KERN_ERR "No mcount PLT entry for module %s\n",
> + mod->name);
> + return -EINVAL;
> + }
> + addr = mod->arch.ftrace_caller_plt;
> +modify:
> + old= ftrace_nop_replace();
> + new = ftrace_call_replace(ip, addr);
> + return ftrace_modify_code(ip, old, new, 1);
> +}
> +
> +struct ftrace_caller_code {
> + u64 dummy1:46;
> + u64 imm41_18:64-46;
> + u64 imm41_23:41-(64-46);
> + u64 dummy2:13;
> + u64 imm7b:7;
> + u64 dummy3:1;
> + u64 ic:1;
> + u64 imm5c:5;
> + u64 imm9d:9;
> + u64 i:1;
> + u64 dummy4:4;
> +};
> +
> +/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
> +int ftrace_update_ftrace_func(ftrace_func_t func)
> +{
> + unsigned long ip;
> + unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
> + struct ftrace_caller_code *code = (void *)addr;
> +
> + if (func = ftrace_stub)
> + return 0;
> + ip = ((struct fnptr *)func)->ip;
> +
> + code->i = ip >> 63;
> + code->imm9d = ip >> 7;
> + code->imm5c = ip >> 16;
> + code->ic = ip >> 21;
> + code->imm7b = ip & 0x7f;
> + code->imm41_18 = ip >> 22;
> + code->imm41_23 = ip >> 40;
> +
> + flush_icache_range(addr, addr + 16);
> + return 0;
> +}
> +
> +/* run from kstop_machine */
> +int __init ftrace_dyn_arch_init(void *data)
> +{
> + *(unsigned long *)data = 0;
> +
> + return 0;
> +}
> Index: linux/scripts/recordmcount.pl
> =================================> --- linux.orig/scripts/recordmcount.pl 2008-12-23 13:24:59.000000000 +0800
> +++ linux/scripts/recordmcount.pl 2008-12-23 13:30:09.000000000 +0800
> @@ -206,6 +206,13 @@ if ($arch eq "x86_64") {
> $alignment = 2;
> $section_type = '%progbits';
>
> +} elsif ($arch eq "ia64") {
> + $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
> + $type = "data8";
> +
> + if ($is_module eq "0") {
> + $cc .= " -mconstant-gp";
> + }
I wonder if it would be better to pass in CFLAGS and then be able to
parse that instead. Then we can find out a lot more about what we are
working on.
-- Steve
> } else {
> die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
> }
> Index: linux/arch/ia64/include/asm/ftrace.h
> =================================> --- linux.orig/arch/ia64/include/asm/ftrace.h 2008-12-23 13:13:17.000000000 +0800
> +++ linux/arch/ia64/include/asm/ftrace.h 2008-12-23 13:30:09.000000000 +0800
> @@ -7,6 +7,23 @@
> #ifndef __ASSEMBLY__
> extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
> #define mcount _mcount
> +#define ftrace_call ftrace_caller
> +
> +#include <asm/kprobes.h>
> +/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
> +#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
> +#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
> +
> +static inline unsigned long ftrace_call_adjust(unsigned long addr)
> +{
> + /* second bundle, insn 2 */
> + return addr - 0x12;
> +}
> +
> +
> +struct dyn_arch_ftrace {
> + struct module *mod;
> +};
>
> #endif
>
> Index: linux/arch/ia64/include/asm/module.h
> =================================> --- linux.orig/arch/ia64/include/asm/module.h 2008-12-23 13:11:27.000000000 +0800
> +++ linux/arch/ia64/include/asm/module.h 2008-12-23 13:30:09.000000000 +0800
> @@ -21,6 +21,10 @@ struct mod_arch_specific {
> void *core_unw_table; /* core unwind-table cookie returned by unwinder */
> void *init_unw_table; /* init unwind-table cookie returned by unwinder */
> unsigned int next_got_entry; /* index of next available got entry */
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> + uint64_t ftrace_caller_plt;
> +#endif
> };
>
> #define Elf_Shdr Elf64_Shdr
> Index: linux/arch/ia64/kernel/module.c
> =================================> --- linux.orig/arch/ia64/kernel/module.c 2008-12-23 13:11:27.000000000 +0800
> +++ linux/arch/ia64/kernel/module.c 2008-12-23 13:30:09.000000000 +0800
> @@ -32,6 +32,7 @@
> #include <linux/moduleloader.h>
> #include <linux/string.h>
> #include <linux/vmalloc.h>
> +#include <linux/ftrace.h>
>
> #include <asm/patch.h>
> #include <asm/unaligned.h>
> @@ -468,6 +469,9 @@ module_frob_arch_sections (Elf_Ehdr *ehd
> core_plts += count_plts(rels, numrels);
> }
>
> +#ifdef CONFIG_DYNAMIC_FTRACE
> + core_plts++;
> +#endif
> mod->arch.core_plt->sh_type = SHT_NOBITS;
> mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
> mod->arch.core_plt->sh_addralign = 16;
> @@ -839,6 +843,17 @@ apply_relocate_add (Elf64_Shdr *sechdrs,
> if (ret < 0)
> return ret;
> }
> +#ifdef CONFIG_DYNAMIC_FTRACE
> + if (!mod->arch.ftrace_caller_plt) {
> + int ok = 1;
> + /* fake a 'struct insn' for get_plt, which is in module core */
> + uint64_t fake_insn = (uint64_t)mod->module_init + mod->init_size;
> + mod->arch.ftrace_caller_plt = get_plt(mod,
> + (struct insn *)fake_insn, (uint64_t)ftrace_caller, &ok);
> + if (!ok)
> + mod->arch.ftrace_caller_plt = 0;
> + }
> +#endif
> return 0;
> }
>
> Index: linux/arch/ia64/kernel/entry.S
> =================================> --- linux.orig/arch/ia64/kernel/entry.S 2008-12-23 13:13:17.000000000 +0800
> +++ linux/arch/ia64/kernel/entry.S 2008-12-23 13:30:09.000000000 +0800
> @@ -1406,6 +1406,42 @@ GLOBAL_ENTRY(unw_init_running)
> END(unw_init_running)
>
> #ifdef CONFIG_FUNCTION_TRACER
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +GLOBAL_ENTRY(_mcount)
> + br ftrace_stub
> +END(_mcount)
> +
> +.here:
> + br.ret.sptk.many b0
> +
> +GLOBAL_ENTRY(ftrace_caller)
> +{
> + .mlx
> + nop.m 0x0
> + movl r3 = .here;;
> +}
> + alloc loc0 = ar.pfs, 4, 4, 2, 0
> + ;;
> + mov loc1 = b0
> + mov out0 = b0
> + mov loc2 = r8
> + mov loc3 = r15
> + ;;
> + adds out0 = -MCOUNT_INSN_SIZE, out0
> + mov out1 = in2
> + mov b6 = r3
> +
> + br.call.sptk.many b0 = b6
> + ;;
> + mov ar.pfs = loc0
> + mov b0 = loc1
> + mov r8 = loc2
> + mov r15 = loc3
> + br ftrace_stub
> + ;;
> +END(ftrace_caller)
> +
> +#else
> GLOBAL_ENTRY(_mcount)
> movl r2 = ftrace_stub
> movl r3 = ftrace_trace_function;;
> @@ -1435,6 +1471,7 @@ GLOBAL_ENTRY(_mcount)
> br ftrace_stub
> ;;
> END(_mcount)
> +#endif
>
> GLOBAL_ENTRY(ftrace_stub)
> mov r3 = b0
>
>
next prev parent reply other threads:[~2008-12-23 14:35 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-23 6:08 [PATCH 5/5] IA64 dynamic ftrace support Shaohua Li
2008-12-23 14:35 ` Steven Rostedt [this message]
2008-12-24 0:54 ` Shaohua Li
2008-12-24 1:00 ` Steven Rostedt
2008-12-24 8:08 ` Shaohua Li
2008-12-24 13:29 ` Steven Rostedt
2008-12-24 21:50 ` Keith Owens
2008-12-25 1:08 ` Shaohua Li
2008-12-25 3:54 ` Steven Rostedt
2008-12-25 4:01 ` Shaohua Li
2008-12-26 2:42 ` Shaohua Li
2008-12-31 9:11 ` Shaohua Li
2009-01-06 0:42 ` Luck, Tony
2009-01-08 8:05 ` Shaohua Li
2009-01-08 17:08 ` Steven Rostedt
2009-01-08 20:25 ` Luck, Tony
2009-01-08 22:24 ` Luck, Tony
2009-01-09 2:42 ` Shaohua Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1230042949.30177.103.camel@localhost.localdomain \
--to=srostedt@redhat.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox