* [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com> @ 2016-05-31 10:56 ` Anju T 2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T ` (2 subsequent siblings) 3 siblings, 0 replies; 4+ messages in thread From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw) To: linux-kernel, linuxppc-dev Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant, mahesh, mhiramat, anjutsudhakar Detour buffer contains instructions to create an in memory pt_regs. After the execution of prehandler a call is made for instruction emulation. The NIP is decided after the probed instruction is executed. Hence a branch instruction is created to the NIP returned by emulate_step(). Instruction slot for detour buffer is allocated from the reserved area. For the time being 64KB is reserved in memory for this purpose. Signed-off-by: Anju T <anju@linux.vnet.ibm.com> --- arch/powerpc/include/asm/kprobes.h | 27 +++++++ arch/powerpc/kernel/optprobes_head.S | 136 +++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 arch/powerpc/kernel/optprobes_head.S diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 039b583..1cb2527 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -38,7 +38,27 @@ struct pt_regs; struct kprobe; typedef ppc_opcode_t kprobe_opcode_t; + +extern kprobe_opcode_t optinsn_slot; +/* Optinsn template address */ +extern kprobe_opcode_t optprobe_template_entry[]; +extern kprobe_opcode_t optprobe_template_call_handler[]; +extern kprobe_opcode_t optprobe_template_call_emulate[]; +extern kprobe_opcode_t optprobe_template_ret_branch[]; +extern kprobe_opcode_t optprobe_template_ret[]; +extern kprobe_opcode_t optprobe_template_insn[]; +extern kprobe_opcode_t optprobe_template_kp_addr[]; +extern kprobe_opcode_t optprobe_template_op_address1[]; +extern kprobe_opcode_t optprobe_template_op_address2[]; +extern kprobe_opcode_t optprobe_template_end[]; + #define MAX_INSN_SIZE 1 +#define MAX_OPTIMIZED_LENGTH 4 +#define MAX_OPTINSN_SIZE \ + (((unsigned long)&optprobe_template_end - \ + (unsigned long)&optprobe_template_entry) / \ + sizeof(kprobe_opcode_t)) +#define RELATIVEJUMP_SIZE 4 #ifdef CONFIG_PPC64 #if defined(_CALL_ELF) && _CALL_ELF == 2 @@ -129,5 +149,12 @@ struct kprobe_ctlblk { extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); + +struct arch_optimized_insn { + kprobe_opcode_t copied_insn[1]; + /* detour buffer */ + kprobe_opcode_t *insn; +}; + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KPROBES_H */ diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S new file mode 100644 index 0000000..b2536bc --- /dev/null +++ b/arch/powerpc/kernel/optprobes_head.S @@ -0,0 +1,136 @@ +/* + * Code to prepare detour buffer for optprobes in Kernel. + * + * Copyright 2016, Anju T, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> + +#define OPT_SLOT_SIZE 65536 + +.align 2 +.global optinsn_slot +optinsn_slot: + /* Reserve an area to allocate slots for detour buffer */ + .space OPT_SLOT_SIZE + +/* Create an in-memory pt_regs */ +.global optprobe_template_entry +optprobe_template_entry: + stdu r1,-INT_FRAME_SIZE(r1) + SAVE_GPR(0,r1) + /* Save the previous SP into stack */ + addi r0,r1,INT_FRAME_SIZE + std 0,GPR1(r1) + SAVE_2GPRS(2,r1) + SAVE_8GPRS(4,r1) + SAVE_10GPRS(12,r1) + SAVE_10GPRS(22,r1) + /* Save SPRS */ + mfmsr r5 + std r5,_MSR(r1) + li r5,0 + std r5,ORIG_GPR3(r1) + mfctr r5 + std r5,_CTR(r1) + mflr r5 + std r5,_LINK(r1) + mfspr r5,SPRN_XER + std r5,_XER(r1) + mfcr r5 + std r5,_CCR(r1) + lbz r5,PACASOFTIRQEN(r13) + std r5,SOFTE(r1) + li r5,0 + std r5,_TRAP(r1) + mfdar r5 + std r5,_DAR(r1) + mfdsisr r5 + std r5,_DSISR(r1) + li r5,0 + std r5,RESULT(r1) + +/* Save p->addr into stack */ +.global optprobe_template_kp_addr +optprobe_template_kp_addr: + nop + nop + nop + nop + nop + std r3,_NIP(r1) + +/* Pass parameters for optimized_callback */ +.global optprobe_template_op_address1 +optprobe_template_op_address1: + nop + nop + nop + nop + nop + addi r4,r1,STACK_FRAME_OVERHEAD + +/* Branch to the prehandler */ +.global optprobe_template_call_handler +optprobe_template_call_handler: + nop + /* Pass parameters for instruction emulation */ + addi r3,r1,STACK_FRAME_OVERHEAD +.global optprobe_template_insn +optprobe_template_insn: + nop + nop + +/* Branch to instruction emulation */ +.global optprobe_template_call_emulate +optprobe_template_call_emulate: + nop +.global optprobe_template_op_address2 +optprobe_template_op_address2: + nop + nop + nop + nop + nop + addi r4,r1,STACK_FRAME_OVERHEAD + +/* Branch to create_return_branch() function */ +.global optprobe_template_ret_branch +optprobe_template_ret_branch: + nop + /* Restore the registers */ + ld r5,_MSR(r1) + mtmsr r5 + ld r5,_CTR(r1) + mtctr r5 + ld r5,_LINK(r1) + mtlr r5 + ld r5,_XER(r1) + mtxer r5 + ld r5,_CCR(r1) + mtcr r5 + ld r5,_DAR(r1) + mtdar r5 + ld r5,_DSISR(r1) + mtdsisr r5 + REST_GPR(0,r1) + REST_2GPRS(2,r1) + REST_8GPRS(4,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) + /* Restore the previous SP */ + addi r1,r1,INT_FRAME_SIZE + +/* Jump back to the normal workflow from trampoline */ +.global optprobe_template_ret +optprobe_template_ret: + nop +.global optprobe_template_end +optprobe_template_end: -- 2.1.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com> 2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T @ 2016-05-31 10:56 ` Anju T 2016-05-31 10:56 ` [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc Anju T [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com> 3 siblings, 0 replies; 4+ messages in thread From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw) To: linux-kernel, linuxppc-dev Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant, mahesh, mhiramat, anjutsudhakar Instructions which can be emulated are suppliants for optimization. Before optimization ensure that the address range between the detour buffer allocated and the instruction being probed is within +/- 32MB. Signed-off-by: Anju T <anju@linux.vnet.ibm.com> --- arch/powerpc/kernel/optprobes.c | 351 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 arch/powerpc/kernel/optprobes.c diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c new file mode 100644 index 0000000..c4253b6 --- /dev/null +++ b/arch/powerpc/kernel/optprobes.c @@ -0,0 +1,351 @@ +/* + * Code for Kernel probes Jump optimization. + * + * Copyright 2016, Anju T, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kprobes.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <asm/kprobes.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/code-patching.h> +#include <asm/sstep.h> + +DEFINE_INSN_CACHE_OPS(ppc_optinsn) + +#define TMPL_CALL_HDLR_IDX \ + (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX \ + (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_BRANCH_IDX \ + (optprobe_template_ret_branch - optprobe_template_entry) +#define TMPL_RET_IDX \ + (optprobe_template_ret - optprobe_template_entry) +#define TMPL_KP_IDX \ + (optprobe_template_kp_addr - optprobe_template_entry) +#define TMPL_OP1_IDX \ + (optprobe_template_op_address1 - optprobe_template_entry) +#define TMPL_OP2_IDX \ + (optprobe_template_op_address2 - optprobe_template_entry) +#define TMPL_INSN_IDX \ + (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX \ + (optprobe_template_end - optprobe_template_entry) + +static unsigned long val_nip; + +static void *__ppc_alloc_insn_page(void) +{ + return &optinsn_slot; +} + +static void *__ppc_free_insn_page(void *page __maybe_unused) +{ + return; +} + +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), + /* insn_size initialized later */ + .alloc = __ppc_alloc_insn_page, + .free = __ppc_free_insn_page, + .nr_garbage = 0, +}; + +kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op) +{ + /* + * The insn slot is allocated from the reserved + * area(ie &optinsn_slot).We are not optimizing probes + * at module_addr now. + */ + kprobe_opcode_t *slot = NULL; + + if (is_kernel_addr(op->kp.addr)) + slot = get_ppc_optinsn_slot(); + return slot; +} + +static void ppc_free_optinsn_slot(struct optimized_kprobe *op) +{ + if (!op->optinsn.insn) + return; + if (is_kernel_addr((unsigned long)op->kp.addr)) + free_ppc_optinsn_slot(op->optinsn.insn, 0); +} + +static void +__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) +{ + ppc_free_optinsn_slot(op); + op->optinsn.insn = NULL; +} + +static int can_optimize(struct kprobe *p) +{ + struct pt_regs *regs; + unsigned int instr; + int r; + + /* + * Not optimizing the kprobe placed by + * kretprobe during boot time + */ + if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline) + return 0; + + regs = kmalloc(sizeof(*regs), GFP_KERNEL); + if (!regs) + return -ENOMEM; + memset(regs, 0, sizeof(struct pt_regs)); + memcpy(regs, current_pt_regs(), sizeof(struct pt_regs)); + regs->nip = p->addr; + instr = *(p->ainsn.insn); + + /* Ensure the instruction can be emulated*/ + r = emulate_step(regs, instr); + val_nip = regs->nip; + if (r != 1) + return 0; + + return 1; +} + +static void +create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs) +{ + /* + * Create a branch back to the return address + * after the probed instruction is emulated + */ + + kprobe_opcode_t branch, *buff; + unsigned long ret; + + ret = regs->nip; + buff = op->optinsn.insn; + /* + * TODO: For conditional branch instructions, the return + * address may differ in SMP systems.This has to be addressed. + */ + + branch = create_branch((unsigned int *)buff + TMPL_RET_IDX, + (unsigned long)ret, 0); + buff[TMPL_RET_IDX] = branch; + isync(); +} + +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; + + local_irq_save(flags); + + if (kprobe_running()) + kprobes_inc_nmissed_count(&op->kp); + else { + __this_cpu_write(current_kprobe, &op->kp); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(optimized_callback); + +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + __arch_remove_optimized_kprobe(op, 1); +} + +void create_insn(unsigned int insn, kprobe_opcode_t *addr) +{ + u32 instr, instr2; + + /* + * emulate_step() requires insn to be emulated as + * second parameter. Hence r4 should be loaded + * with 'insn'. + * synthesize addis r4,0,(insn)@h + */ + instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff); + *addr++ = instr; + + /* ori r4,r4,(insn)@l */ + instr2 = 0x60000000 | 0x40000 | 0x800000; + instr2 = instr2 | (insn & 0xffff); + *addr = instr2; +} + +void create_load_address_insn(unsigned long val, kprobe_opcode_t *addr) +{ + u32 instr1, instr2, instr3, instr4, instr5; + /* + * Optimized_kprobe structure is required as a parameter + * for invoking optimized_callback() and create_return_branch() + * from detour buffer. Hence need to have a 64bit immediate + * load into r3. + * + * lis r3,(op)@highest + */ + instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff); + *addr++ = instr1; + + /* ori r3,r3,(op)@higher */ + instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff); + *addr++ = instr2; + + /* rldicr r3,r3,32,31 */ + instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11); + instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4); + *addr++ = instr3; + + /* oris r3,r3,(op)@h */ + instr4 = 0x64000000 | 0x30000 | 0x600000 | ((val >> 16) & 0xffff); + *addr++ = instr4; + + /* ori r3,r3,(op)@l */ + instr5 = 0x60000000 | 0x30000 | 0x600000 | (val & 0xffff); + *addr = instr5; +} + +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) +{ + kprobe_opcode_t *buff, branch, branch2, branch3; + long rel_chk, ret_chk; + + kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; + op->optinsn.insn = NULL; + + if (!can_optimize(p)) + return -EILSEQ; + + /* Allocate instruction slot for detour buffer*/ + buff = ppc_get_optinsn_slot(op); + if (!buff) + return -ENOMEM; + + /* + * OPTPROBE use a 'b' instruction to branch to optinsn.insn. + * + * The target address has to be relatively nearby, to permit use + * of branch instruction in powerpc because the address is specified + * in an immediate field in the instruction opcode itself, ie 24 bits + * in the opcode specify the address. Therefore the address gap should + * be 32MB on either side of the current instruction. + */ + rel_chk = (long)buff - (unsigned long)p->addr; + if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) { + ppc_free_optinsn_slot(op); + return -ERANGE; + } + /* Check the return address is also within 32MB range */ + ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)val_nip; + if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) { + ppc_free_optinsn_slot(op); + return -ERANGE; + } + + /* Do Copy arch specific instance from template*/ + memcpy(buff, optprobe_template_entry, + TMPL_END_IDX * sizeof(kprobe_opcode_t)); + create_load_address_insn((unsigned long)p->addr, buff + TMPL_KP_IDX); + create_load_address_insn((unsigned long)op, buff + TMPL_OP1_IDX); + create_load_address_insn((unsigned long)op, buff + TMPL_OP2_IDX); + + /* Create a branch to the optimized_callback function */ + branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, + (unsigned long)optimized_callback + 8, + BRANCH_SET_LINK); + + /* Place the branch instr into the trampoline */ + buff[TMPL_CALL_HDLR_IDX] = branch; + create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX); + + /*Create a branch instruction into the emulate_step*/ + branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, + (unsigned long)emulate_step + 8, + BRANCH_SET_LINK); + buff[TMPL_EMULATE_IDX] = branch3; + + /* Create a branch for jumping back*/ + branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX, + (unsigned long)create_return_branch + 8, + BRANCH_SET_LINK); + buff[TMPL_RET_BRANCH_IDX] = branch2; + + op->optinsn.insn = buff; + smp_mb(); + return 0; +} + +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) +{ + return optinsn->insn; +} + +/* + * Here,kprobe opt always replace one instruction (4 bytes + * aligned and 4 bytes long). It is impossible to encounter another + * kprobe in the address range. So always return 0. + */ +int arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + return 0; +} + +void arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op; + struct optimized_kprobe *tmp; + + unsigned int branch; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* + * Backup instructions which will be replaced + *by jump address + */ + memcpy(op->optinsn.copied_insn, op->kp.addr, + RELATIVEJUMP_SIZE); + branch = create_branch((unsigned int *)op->kp.addr, + (unsigned long)op->optinsn.insn, 0); + *op->kp.addr = branch; + list_del_init(&op->list); + } +} + +void arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + arch_arm_kprobe(&op->kp); +} + +void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op; + struct optimized_kprobe *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + arch_unoptimize_kprobe(op); + list_move(&op->list, done_list); + } +} + +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) +{ + return 0; +} -- 2.1.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com> 2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T 2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T @ 2016-05-31 10:56 ` Anju T [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com> 3 siblings, 0 replies; 4+ messages in thread From: Anju T @ 2016-05-31 10:56 UTC (permalink / raw) To: linux-kernel, linuxppc-dev Cc: anju, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant, mahesh, mhiramat, anjutsudhakar Signed-off-by: Anju T <anju@linux.vnet.ibm.com> --- Documentation/features/debug/optprobes/arch-support.txt | 2 +- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/Makefile | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt index b8999d8..45bc99d 100644 --- a/Documentation/features/debug/optprobes/arch-support.txt +++ b/Documentation/features/debug/optprobes/arch-support.txt @@ -27,7 +27,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | TODO | + | powerpc: | ok | | s390: | TODO | | score: | TODO | | sh: | TODO | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7cd32c0..a87c9b1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -104,6 +104,7 @@ config PPC select HAVE_IOREMAP_PROT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN select HAVE_KPROBES + select HAVE_OPTPROBES select HAVE_ARCH_KGDB select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2da380f..7994e22 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -99,6 +99,7 @@ endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -- 2.1.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
[parent not found: <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com>]
* Re: [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com> @ 2016-05-31 14:55 ` Masami Hiramatsu 0 siblings, 0 replies; 4+ messages in thread From: Masami Hiramatsu @ 2016-05-31 14:55 UTC (permalink / raw) To: Anju T Cc: linux-kernel, linuxppc-dev, ananth, naveen.n.rao, paulus, srikar, benh, mpe, hemant, mahesh, mhiramat, anjutsudhakar On Tue, 31 May 2016 16:26:30 +0530 Anju T <anju@linux.vnet.ibm.com> wrote: > Instructions which can be emulated are suppliants for > optimization. Before optimization ensure that the address range > between the detour buffer allocated and the instruction being probed > is within +/- 32MB. > > Signed-off-by: Anju T <anju@linux.vnet.ibm.com> > --- > arch/powerpc/kernel/optprobes.c | 351 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 351 insertions(+) > create mode 100644 arch/powerpc/kernel/optprobes.c > > diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c > new file mode 100644 > index 0000000..c4253b6 > --- /dev/null > +++ b/arch/powerpc/kernel/optprobes.c > @@ -0,0 +1,351 @@ > +/* > + * Code for Kernel probes Jump optimization. > + * > + * Copyright 2016, Anju T, IBM Corp. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#include <linux/kprobes.h> > +#include <linux/jump_label.h> > +#include <linux/types.h> > +#include <linux/slab.h> > +#include <linux/list.h> > +#include <asm/kprobes.h> > +#include <asm/ptrace.h> > +#include <asm/cacheflush.h> > +#include <asm/code-patching.h> > +#include <asm/sstep.h> > + > +DEFINE_INSN_CACHE_OPS(ppc_optinsn) > + > +#define TMPL_CALL_HDLR_IDX \ > + (optprobe_template_call_handler - optprobe_template_entry) > +#define TMPL_EMULATE_IDX \ > + (optprobe_template_call_emulate - optprobe_template_entry) > +#define TMPL_RET_BRANCH_IDX \ > + (optprobe_template_ret_branch - optprobe_template_entry) > +#define TMPL_RET_IDX \ > + (optprobe_template_ret - optprobe_template_entry) > +#define TMPL_KP_IDX \ > + (optprobe_template_kp_addr - optprobe_template_entry) > +#define TMPL_OP1_IDX \ > + (optprobe_template_op_address1 - optprobe_template_entry) > +#define TMPL_OP2_IDX \ > + (optprobe_template_op_address2 - optprobe_template_entry) > +#define TMPL_INSN_IDX \ > + (optprobe_template_insn - optprobe_template_entry) > +#define TMPL_END_IDX \ > + (optprobe_template_end - optprobe_template_entry) > + > +static unsigned long val_nip; > + > +static void *__ppc_alloc_insn_page(void) > +{ > + return &optinsn_slot; > +} > + > +static void *__ppc_free_insn_page(void *page __maybe_unused) > +{ > + return; > +} Hmm, you should not return optinsn_slot twice or more, because it actually doesn't allocate memory but just returns reserved memory area. So, it should be something like this; static bool insn_page_in_use; static void *__ppc_alloc_insn_page(void) { if (insn_page_in_use) return NULL; insn_page_in_use = true; return &optinsn_slot; } static void *__ppc_free_insn_page(void *page __maybe_unused) { insn_page_in_use = false; } > + > +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { > + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), > + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), > + /* insn_size initialized later */ > + .alloc = __ppc_alloc_insn_page, > + .free = __ppc_free_insn_page, > + .nr_garbage = 0, > +}; > + > +kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op) > +{ > + /* > + * The insn slot is allocated from the reserved > + * area(ie &optinsn_slot).We are not optimizing probes > + * at module_addr now. > + */ > + kprobe_opcode_t *slot = NULL; > + > + if (is_kernel_addr(op->kp.addr)) > + slot = get_ppc_optinsn_slot(); > + return slot; > +} > + > +static void ppc_free_optinsn_slot(struct optimized_kprobe *op) > +{ > + if (!op->optinsn.insn) > + return; > + if (is_kernel_addr((unsigned long)op->kp.addr)) > + free_ppc_optinsn_slot(op->optinsn.insn, 0); > +} > + > +static void > +__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) > +{ > + ppc_free_optinsn_slot(op); > + op->optinsn.insn = NULL; > +} > + > +static int can_optimize(struct kprobe *p) > +{ > + struct pt_regs *regs; > + unsigned int instr; > + int r; > + > + /* > + * Not optimizing the kprobe placed by > + * kretprobe during boot time > + */ > + if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)&kretprobe_trampoline) > + return 0; > + > + regs = kmalloc(sizeof(*regs), GFP_KERNEL); > + if (!regs) > + return -ENOMEM; > + memset(regs, 0, sizeof(struct pt_regs)); > + memcpy(regs, current_pt_regs(), sizeof(struct pt_regs)); > + regs->nip = p->addr; > + instr = *(p->ainsn.insn); > + > + /* Ensure the instruction can be emulated*/ > + r = emulate_step(regs, instr); > + val_nip = regs->nip; > + if (r != 1) > + return 0; > + > + return 1; > +} > + > +static void > +create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs) > +{ > + /* > + * Create a branch back to the return address > + * after the probed instruction is emulated > + */ > + > + kprobe_opcode_t branch, *buff; > + unsigned long ret; > + > + ret = regs->nip; > + buff = op->optinsn.insn; > + /* > + * TODO: For conditional branch instructions, the return > + * address may differ in SMP systems.This has to be addressed. > + */ > + > + branch = create_branch((unsigned int *)buff + TMPL_RET_IDX, > + (unsigned long)ret, 0); > + buff[TMPL_RET_IDX] = branch; > + isync(); > +} > + > +static void > +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) > +{ > + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); > + unsigned long flags; > + > + local_irq_save(flags); > + > + if (kprobe_running()) > + kprobes_inc_nmissed_count(&op->kp); > + else { > + __this_cpu_write(current_kprobe, &op->kp); > + kcb->kprobe_status = KPROBE_HIT_ACTIVE; > + opt_pre_handler(&op->kp, regs); > + __this_cpu_write(current_kprobe, NULL); > + } > + local_irq_restore(flags); > +} > +NOKPROBE_SYMBOL(optimized_callback); > + > +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) > +{ > + __arch_remove_optimized_kprobe(op, 1); > +} > + > +void create_insn(unsigned int insn, kprobe_opcode_t *addr) > +{ > + u32 instr, instr2; > + > + /* > + * emulate_step() requires insn to be emulated as > + * second parameter. Hence r4 should be loaded > + * with 'insn'. > + * synthesize addis r4,0,(insn)@h > + */ > + instr = 0x3c000000 | 0x800000 | ((insn >> 16) & 0xffff); > + *addr++ = instr; > + > + /* ori r4,r4,(insn)@l */ > + instr2 = 0x60000000 | 0x40000 | 0x800000; > + instr2 = instr2 | (insn & 0xffff); > + *addr = instr2; > +} > + > +void create_load_address_insn(unsigned long val, kprobe_opcode_t *addr) > +{ > + u32 instr1, instr2, instr3, instr4, instr5; > + /* > + * Optimized_kprobe structure is required as a parameter > + * for invoking optimized_callback() and create_return_branch() > + * from detour buffer. Hence need to have a 64bit immediate > + * load into r3. > + * > + * lis r3,(op)@highest > + */ > + instr1 = 0x3c000000 | 0x600000 | ((val >> 48) & 0xffff); > + *addr++ = instr1; > + > + /* ori r3,r3,(op)@higher */ > + instr2 = 0x60000000 | 0x30000 | 0x600000 | ((val >> 32) & 0xffff); > + *addr++ = instr2; > + > + /* rldicr r3,r3,32,31 */ > + instr3 = 0x78000004 | 0x30000 | 0x600000 | ((32 & 0x1f) << 11); > + instr3 = instr3 | ((31 & 0x1f) << 6) | ((32 & 0x20) >> 4); > + *addr++ = instr3; > + > + /* oris r3,r3,(op)@h */ > + instr4 = 0x64000000 | 0x30000 | 0x600000 | ((val >> 16) & 0xffff); > + *addr++ = instr4; > + > + /* ori r3,r3,(op)@l */ > + instr5 = 0x60000000 | 0x30000 | 0x600000 | (val & 0xffff); > + *addr = instr5; > +} > + > +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) > +{ > + kprobe_opcode_t *buff, branch, branch2, branch3; > + long rel_chk, ret_chk; > + > + kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; > + op->optinsn.insn = NULL; > + > + if (!can_optimize(p)) > + return -EILSEQ; > + > + /* Allocate instruction slot for detour buffer*/ > + buff = ppc_get_optinsn_slot(op); > + if (!buff) > + return -ENOMEM; > + > + /* > + * OPTPROBE use a 'b' instruction to branch to optinsn.insn. > + * > + * The target address has to be relatively nearby, to permit use > + * of branch instruction in powerpc because the address is specified > + * in an immediate field in the instruction opcode itself, ie 24 bits > + * in the opcode specify the address. Therefore the address gap should > + * be 32MB on either side of the current instruction. > + */ > + rel_chk = (long)buff - (unsigned long)p->addr; > + if (rel_chk < -0x2000000 || rel_chk > 0x1fffffc || rel_chk & 0x3) { > + ppc_free_optinsn_slot(op); > + return -ERANGE; > + } > + /* Check the return address is also within 32MB range */ > + ret_chk = (long)(buff + TMPL_RET_IDX) - (unsigned long)val_nip; No, please don't pass the "regs->nip" via hidden channel like as this val_nip static local variable. Instead, you should run emulate_step() again here, or, at least you must keep the address and compare it. I recommend former. Thank you, > + if (ret_chk < -0x2000000 || ret_chk > 0x1fffffc || ret_chk & 0x3) { > + ppc_free_optinsn_slot(op); > + return -ERANGE; > + } > + > + /* Do Copy arch specific instance from template*/ > + memcpy(buff, optprobe_template_entry, > + TMPL_END_IDX * sizeof(kprobe_opcode_t)); > + create_load_address_insn((unsigned long)p->addr, buff + TMPL_KP_IDX); > + create_load_address_insn((unsigned long)op, buff + TMPL_OP1_IDX); > + create_load_address_insn((unsigned long)op, buff + TMPL_OP2_IDX); > + > + /* Create a branch to the optimized_callback function */ > + branch = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, > + (unsigned long)optimized_callback + 8, > + BRANCH_SET_LINK); > + > + /* Place the branch instr into the trampoline */ > + buff[TMPL_CALL_HDLR_IDX] = branch; > + create_insn(*(p->ainsn.insn), buff + TMPL_INSN_IDX); > + > + /*Create a branch instruction into the emulate_step*/ > + branch3 = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, > + (unsigned long)emulate_step + 8, > + BRANCH_SET_LINK); > + buff[TMPL_EMULATE_IDX] = branch3; > + > + /* Create a branch for jumping back*/ > + branch2 = create_branch((unsigned int *)buff + TMPL_RET_BRANCH_IDX, > + (unsigned long)create_return_branch + 8, > + BRANCH_SET_LINK); > + buff[TMPL_RET_BRANCH_IDX] = branch2; > + > + op->optinsn.insn = buff; > + smp_mb(); > + return 0; > +} > + > +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) > +{ > + return optinsn->insn; > +} > + > +/* > + * Here,kprobe opt always replace one instruction (4 bytes > + * aligned and 4 bytes long). It is impossible to encounter another > + * kprobe in the address range. So always return 0. > + */ > +int arch_check_optimized_kprobe(struct optimized_kprobe *op) > +{ > + return 0; > +} > + > +void arch_optimize_kprobes(struct list_head *oplist) > +{ > + struct optimized_kprobe *op; > + struct optimized_kprobe *tmp; > + > + unsigned int branch; > + > + list_for_each_entry_safe(op, tmp, oplist, list) { > + /* > + * Backup instructions which will be replaced > + *by jump address > + */ > + memcpy(op->optinsn.copied_insn, op->kp.addr, > + RELATIVEJUMP_SIZE); > + branch = create_branch((unsigned int *)op->kp.addr, > + (unsigned long)op->optinsn.insn, 0); > + *op->kp.addr = branch; > + list_del_init(&op->list); > + } > +} > + > +void arch_unoptimize_kprobe(struct optimized_kprobe *op) > +{ > + arch_arm_kprobe(&op->kp); > +} > + > +void arch_unoptimize_kprobes(struct list_head *oplist, > + struct list_head *done_list) > +{ > + struct optimized_kprobe *op; > + struct optimized_kprobe *tmp; > + > + list_for_each_entry_safe(op, tmp, oplist, list) { > + arch_unoptimize_kprobe(op); > + list_move(&op->list, done_list); > + } > +} > + > +int arch_within_optimized_kprobe(struct optimized_kprobe *op, > + unsigned long addr) > +{ > + return 0; > +} > -- > 2.1.0 > -- Masami Hiramatsu <mhiramat@kernel.org> ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-05-31 14:55 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <1464692191-1167-1-git-send-email-anju@linux.vnet.ibm.com> 2016-05-31 10:56 ` [RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes Anju T 2016-05-31 10:56 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Anju T 2016-05-31 10:56 ` [RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc Anju T [not found] ` <201605311058.u4VAsdah009164@mx0a-001b2d01.pphosted.com> 2016-05-31 14:55 ` [RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core Masami Hiramatsu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).