All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Baron <jbaron@redhat.com>
To: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	Ingo Molnar <mingo@elte.hu>,
	Ananth N Mavinakayanahalli <ananth@in.ibm.com>,
	lkml <linux-kernel@vger.kernel.org>,
	systemtap <systemtap@sources.redhat.com>,
	DLE <dle-develop@lists.sourceforge.net>,
	Jim Keniston <jkenisto@us.ibm.com>,
	Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
	Christoph Hellwig <hch@infradead.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Anders Kaseorg <andersk@ksplice.com>,
	Tim Abbott <tabbott@ksplice.com>,
	Andi Kleen <andi@firstfloor.org>,
	Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Subject: Re: [PATCH -tip v5 07/10] kprobes/x86: Support kprobes jump optimization on x86
Date: Tue, 24 Nov 2009 11:27:09 -0500	[thread overview]
Message-ID: <20091124162708.GA29995@redhat.com> (raw)
In-Reply-To: <20091123232211.22071.58974.stgit@dhcp-100-2-132.bos.redhat.com>

On Mon, Nov 23, 2009 at 06:22:11PM -0500, Masami Hiramatsu wrote:
> Introduce x86 arch-specific optimization code, which supports both of
> x86-32 and x86-64.
> 
> This code also supports safety checking, which decodes whole of a function
> in which probe is inserted, and checks following conditions before
> optimization:
>  - The optimized instructions which will be replaced by a jump instruction
>    don't straddle the function boundary.
>  - There is no indirect jump instruction, because it will jumps into
>    the address range which is replaced by jump operand.
>  - There is no jump/loop instruction which jumps into the address range
>    which is replaced by jump operand.
>  - Don't optimize kprobes if it is in functions into which fixup code will
>    jumps.
> 
> This uses stop_machine() for corss modifying code from int3 to jump.
> It doesn't allow us to modify code on NMI/SMI path. However, since
> kprobes itself doesn't support NMI/SMI code probing, it's not a
> problem.
> 
> Changes in v5:
>  - Introduce stop_machine-based jump replacing.
> 
> Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Jim Keniston <jkenisto@us.ibm.com>
> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Anders Kaseorg <andersk@ksplice.com>
> Cc: Tim Abbott <tabbott@ksplice.com>
> Cc: Andi Kleen <andi@firstfloor.org>
> Cc: Jason Baron <jbaron@redhat.com>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> ---
> 
>  arch/x86/Kconfig               |    1 
>  arch/x86/include/asm/kprobes.h |   29 +++
>  arch/x86/kernel/kprobes.c      |  457 ++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 465 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 17abcfa..af0313e 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -31,6 +31,7 @@ config X86
>  	select ARCH_WANT_FRAME_POINTERS
>  	select HAVE_DMA_ATTRS
>  	select HAVE_KRETPROBES
> +	select HAVE_OPTPROBES
>  	select HAVE_FTRACE_MCOUNT_RECORD
>  	select HAVE_DYNAMIC_FTRACE
>  	select HAVE_FUNCTION_TRACER
> diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
> index eaec8ea..4ffa345 100644
> --- a/arch/x86/include/asm/kprobes.h
> +++ b/arch/x86/include/asm/kprobes.h
> @@ -33,6 +33,9 @@ struct kprobe;
>  typedef u8 kprobe_opcode_t;
>  #define BREAKPOINT_INSTRUCTION	0xcc
>  #define RELATIVEJUMP_OPCODE 0xe9
> +#define RELATIVEJUMP_SIZE 5
> +#define RELATIVECALL_OPCODE 0xe8
> +#define RELATIVE_ADDR_SIZE 4
>  #define MAX_INSN_SIZE 16
>  #define MAX_STACK_SIZE 64
>  #define MIN_STACK_SIZE(ADDR)					       \
> @@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t;
>  
>  #define flush_insn_slot(p)	do { } while (0)
>  
> +/* optinsn template addresses */
> +extern kprobe_opcode_t optprobe_template_entry;
> +extern kprobe_opcode_t optprobe_template_val;
> +extern kprobe_opcode_t optprobe_template_call;
> +extern kprobe_opcode_t optprobe_template_end;
> +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
> +#define MAX_OPTINSN_SIZE 				\
> +	(((unsigned long)&optprobe_template_end -	\
> +	  (unsigned long)&optprobe_template_entry) +	\
> +	 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
> +
>  extern const int kretprobe_blacklist_size;
>  
>  void arch_remove_kprobe(struct kprobe *p);
> @@ -64,6 +78,21 @@ struct arch_specific_insn {
>  	int boostable;
>  };
>  
> +struct arch_optimized_insn {
> +	/* copy of the original instructions */
> +	kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
> +	/* detour code buffer */
> +	kprobe_opcode_t *insn;
> +	/* the size of instructions copied to detour code buffer */
> +	size_t size;
> +};
> +
> +/* Return true (!0) if optinsn is prepared for optimization. */
> +static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
> +{
> +	return optinsn->size;
> +}
> +
>  struct prev_kprobe {
>  	struct kprobe *kp;
>  	unsigned long status;
> diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
> index 73ac21e..6d81c11 100644
> --- a/arch/x86/kernel/kprobes.c
> +++ b/arch/x86/kernel/kprobes.c
> @@ -49,6 +49,7 @@
>  #include <linux/module.h>
>  #include <linux/kdebug.h>
>  #include <linux/kallsyms.h>
> +#include <linux/stop_machine.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/desc.h>
> @@ -106,16 +107,21 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
>  };
>  const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
>  
> -/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
> -static void __kprobes set_jmp_op(void *from, void *to)
> +static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
>  {
> -	struct __arch_jmp_op {
> -		char op;
> +	struct __arch_relative_insn {
> +		u8 op;
>  		s32 raddr;
> -	} __attribute__((packed)) * jop;
> -	jop = (struct __arch_jmp_op *)from;
> -	jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
> -	jop->op = RELATIVEJUMP_OPCODE;
> +	} __attribute__((packed)) *insn;
> +	insn = (struct __arch_relative_insn *)from;
> +	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
> +	insn->op = op;
> +}
> +
> +/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
> +static void __kprobes synthesize_reljump(void *from, void *to)
> +{
> +	__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
>  }
>  
>  /*
> @@ -202,7 +208,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
>  	/*
>  	 *  Basically, kp->ainsn.insn has an original instruction.
>  	 *  However, RIP-relative instruction can not do single-stepping
> -	 *  at different place, fix_riprel() tweaks the displacement of
> +	 *  at different place, __copy_instruction() tweaks the displacement of
>  	 *  that instruction. In that case, we can't recover the instruction
>  	 *  from the kp->ainsn.insn.
>  	 *
> @@ -284,21 +290,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
>  }
>  
>  /*
> - * Adjust the displacement if the instruction uses the %rip-relative
> - * addressing mode.
> + * Copy an instruction and adjust the displacement if the instruction
> + * uses the %rip-relative addressing mode.
>   * If it does, Return the address of the 32-bit displacement word.
>   * If not, return null.
>   * Only applicable to 64-bit x86.
>   */
> -static void __kprobes fix_riprel(struct kprobe *p)
> +static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
>  {
> -#ifdef CONFIG_X86_64
>  	struct insn insn;
> -	kernel_insn_init(&insn, p->ainsn.insn);
> +	int ret;
> +	kprobe_opcode_t buf[MAX_INSN_SIZE];
> +
> +	kernel_insn_init(&insn, src);
> +	if (recover) {
> +		insn_get_opcode(&insn);
> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
> +			ret = recover_probed_instruction(buf,
> +							 (unsigned long)src);
> +			if (ret)
> +				return 0;
> +			kernel_insn_init(&insn, buf);
> +		}
> +	}
> +	insn_get_length(&insn);
> +	memcpy(dest, insn.kaddr, insn.length);
>  
> +#ifdef CONFIG_X86_64
>  	if (insn_rip_relative(&insn)) {
>  		s64 newdisp;
>  		u8 *disp;
> +		kernel_insn_init(&insn, dest);
>  		insn_get_displacement(&insn);
>  		/*
>  		 * The copied instruction uses the %rip-relative addressing
> @@ -312,20 +334,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
>  		 * extension of the original signed 32-bit displacement would
>  		 * have given.
>  		 */
> -		newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
> -			  (u8 *) p->ainsn.insn;
> +		newdisp = (u8 *) src + (s64) insn.displacement.value -
> +			  (u8 *) dest;
>  		BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check.  */
> -		disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
> +		disp = (u8 *) dest + insn_offset_displacement(&insn);
>  		*(s32 *) disp = (s32) newdisp;
>  	}
>  #endif
> +	return insn.length;
>  }
>  
>  static void __kprobes arch_copy_kprobe(struct kprobe *p)
>  {
> -	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
> -
> -	fix_riprel(p);
> +	/*
> +	 * Copy an instruction without recovering int3, because it will be
> +	 * put by another subsystem.
> +	 */
> +	__copy_instruction(p->ainsn.insn, p->addr, 0);
>  
>  	if (can_boost(p->addr))
>  		p->ainsn.boostable = 0;
> @@ -414,9 +439,20 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
>  	*sara = (unsigned long) &kretprobe_trampoline;
>  }
>  
> +#ifdef CONFIG_OPTPROBES
> +static int  __kprobes setup_detour_execution(struct kprobe *p,
> +					     struct pt_regs *regs,
> +					     int reenter);
> +#else
> +#define setup_detour_execution(p, regs, reenter) (0)
> +#endif
> +
>  static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
>  				       struct kprobe_ctlblk *kcb, int reenter)
>  {
> +	if (setup_detour_execution(p, regs, reenter))
> +		return;
> +
>  #if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER)
>  	if (p->ainsn.boostable == 1 && !p->post_handler) {
>  		/* Boost up -- we can execute copied instructions directly */
> @@ -812,8 +848,8 @@ static void __kprobes resume_execution(struct kprobe *p,
>  			 * These instructions can be executed directly if it
>  			 * jumps back to correct address.
>  			 */
> -			set_jmp_op((void *)regs->ip,
> -				   (void *)orig_ip + (regs->ip - copy_ip));
> +			synthesize_reljump((void *)regs->ip,
> +				(void *)orig_ip + (regs->ip - copy_ip));
>  			p->ainsn.boostable = 1;
>  		} else {
>  			p->ainsn.boostable = -1;
> @@ -1040,6 +1076,383 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
>  	return 0;
>  }
>  
> +
> +#ifdef CONFIG_OPTPROBES
> +
> +/* Insert a call instruction at address 'from', which calls address 'to'.*/
> +static void __kprobes synthesize_relcall(void *from, void *to)
> +{
> +	__synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
> +}
> +
> +/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
> +static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
> +					  unsigned long val)
> +{
> +#ifdef CONFIG_X86_64
> +	*addr++ = 0x48;
> +	*addr++ = 0xbf;
> +#else
> +	*addr++ = 0xb8;
> +#endif
> +	*(unsigned long *)addr = val;
> +}
> +
> +void __kprobes kprobes_optinsn_template_holder(void)
> +{
> +	asm volatile (
> +			".global optprobe_template_entry\n"
> +			"optprobe_template_entry: \n"
> +#ifdef CONFIG_X86_64
> +			/* We don't bother saving the ss register */
> +			"	pushq %rsp\n"
> +			"	pushfq\n"
> +			SAVE_REGS_STRING
> +			"	movq %rsp, %rsi\n"
> +			".global optprobe_template_val\n"
> +			"optprobe_template_val: \n"
> +			ASM_NOP5
> +			ASM_NOP5
> +			".global optprobe_template_call\n"
> +			"optprobe_template_call: \n"
> +			ASM_NOP5
> +			/* Move flags to rsp */
> +			"	movq 144(%rsp), %rdx\n"
> +			"	movq %rdx, 152(%rsp)\n"
> +			RESTORE_REGS_STRING
> +			/* Skip flags entry */
> +			"	addq $8, %rsp\n"
> +			"	popfq\n"
> +#else /* CONFIG_X86_32 */
> +			"	pushf\n"
> +			SAVE_REGS_STRING
> +			"	movl %esp, %edx\n"
> +			".global optprobe_template_val\n"
> +			"optprobe_template_val: \n"
> +			ASM_NOP5
> +			".global optprobe_template_call\n"
> +			"optprobe_template_call: \n"
> +			ASM_NOP5
> +			RESTORE_REGS_STRING
> +			"	addl $4, %esp\n"	/* skip cs */
> +			"	popf\n"
> +#endif
> +			".global optprobe_template_end\n"
> +			"optprobe_template_end: \n");
> +}
> +
> +#define TMPL_MOVE_IDX \
> +	((long)&optprobe_template_val - (long)&optprobe_template_entry)
> +#define TMPL_CALL_IDX \
> +	((long)&optprobe_template_call - (long)&optprobe_template_entry)
> +#define TMPL_END_IDX \
> +	((long)&optprobe_template_end - (long)&optprobe_template_entry)
> +
> +#define INT3_SIZE sizeof(kprobe_opcode_t)
> +
> +/* Optimized kprobe call back function: called from optinsn */
> +static void __kprobes optimized_callback(struct optimized_kprobe *op,
> +					 struct pt_regs *regs)
> +{
> +	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
> +
> +	preempt_disable();
> +	if (kprobe_running()) {
> +		kprobes_inc_nmissed_count(&op->kp);
> +	} else {
> +		/* Save skipped registers */
> +#ifdef CONFIG_X86_64
> +		regs->cs = __KERNEL_CS;
> +#else
> +		regs->cs = __KERNEL_CS | get_kernel_rpl();
> +		regs->gs = 0;
> +#endif
> +		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
> +		regs->orig_ax = ~0UL;
> +
> +		__get_cpu_var(current_kprobe) = &op->kp;
> +		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
> +		opt_pre_handler(&op->kp, regs);
> +		__get_cpu_var(current_kprobe) = NULL;
> +	}
> +	preempt_enable_no_resched();
> +}
> +
> +static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
> +{
> +	int len = 0, ret;
> +	while (len < RELATIVEJUMP_SIZE) {
> +		ret = __copy_instruction(dest + len, src + len, 1);
> +		if (!ret || !can_boost(dest + len))
> +			return -EINVAL;
> +		len += ret;
> +	}
> +	return len;
> +}
> +
> +/* Check whether insn is indirect jump */
> +static int __kprobes insn_is_indirect_jump(struct insn *insn)
> +{
> +	return (insn->opcode.bytes[0] == 0xff ||
> +		insn->opcode.bytes[0] == 0xea);
> +}
> +
> +/* Check whether insn jumps into specified address range */
> +static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
> +{
> +	unsigned long target = 0;
> +	switch (insn->opcode.bytes[0]) {
> +	case 0xe0:	/* loopne */
> +	case 0xe1:	/* loope */
> +	case 0xe2:	/* loop */
> +	case 0xe3:	/* jcxz */
> +	case 0xe9:	/* near relative jump */
> +	case 0xeb:	/* short relative jump */
> +		break;
> +	case 0x0f:
> +		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
> +			break;
> +		return 0;
> +	default:
> +		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
> +			break;
> +		return 0;
> +	}
> +	target = (unsigned long)insn->next_byte + insn->immediate.value;
> +	return (start <= target && target <= start + len);
> +}
> +
> +/* Decode whole function to ensure any instructions don't jump into target */
> +static int __kprobes can_optimize(unsigned long paddr)
> +{
> +	int ret;
> +	unsigned long addr, size = 0, offset = 0;
> +	struct insn insn;
> +	kprobe_opcode_t buf[MAX_INSN_SIZE];
> +	/* Dummy buffers for lookup_symbol_attrs */
> +	static char __dummy_buf[KSYM_NAME_LEN];
> +
> +	/* Lookup symbol including addr */
> +	if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
> +		return 0;
> +
> +	/* Check there is enough space for a relative jump. */
> +	if (size - offset < RELATIVEJUMP_SIZE)
> +		return 0;
> +
> +	/* Decode instructions */
> +	addr = paddr - offset;
> +	while (addr < paddr - offset + size) { /* Decode until function end */
> +		if (search_exception_tables(addr))
> +			/*
> +			 * Since some fixup code will jumps into this function,
> +			 * we can't optimize kprobe in this function.
> +			 */
> +			return 0;
> +		kernel_insn_init(&insn, (void *)addr);
> +		insn_get_opcode(&insn);
> +		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
> +			ret = recover_probed_instruction(buf, addr);
> +			if (ret)
> +				return 0;
> +			kernel_insn_init(&insn, buf);
> +		}
> +		insn_get_length(&insn);
> +		/* Recover address */
> +		insn.kaddr = (void *)addr;
> +		insn.next_byte = (void *)(addr + insn.length);
> +		/* Check any instructions don't jump into target */
> +		if (insn_is_indirect_jump(&insn) ||
> +		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
> +					 RELATIVE_ADDR_SIZE))
> +			return 0;
> +		addr += insn.length;
> +	}
> +
> +	return 1;
> +}
> +
> +/* Check optimized_kprobe can actually be optimized. */
> +int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
> +{
> +	int i;
> +	for (i = 1; i < op->optinsn.size; i++)
> +		if (get_kprobe(op->kp.addr + i))
> +			return -EEXIST;
> +	return 0;
> +}
> +
> +/* Check the addr is within the optimized instructions. */
> +int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
> +					   unsigned long addr)
> +{
> +	return ((unsigned long)op->kp.addr <= addr &&
> +		(unsigned long)op->kp.addr + op->optinsn.size > addr);
> +}
> +
> +/* Free optimized instruction slot */
> +static __kprobes
> +void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
> +{
> +	if (op->optinsn.insn) {
> +		free_optinsn_slot(op->optinsn.insn, dirty);
> +		op->optinsn.insn = NULL;
> +		op->optinsn.size = 0;
> +	}
> +}
> +
> +void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
> +{
> +	__arch_remove_optimized_kprobe(op, 1);
> +}
> +
> +/*
> + * Copy replacing target instructions
> + * Target instructions MUST be relocatable (checked inside)
> + */
> +int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
> +{
> +	u8 *buf;
> +	int ret;
> +
> +	if (!can_optimize((unsigned long)op->kp.addr))
> +		return -EILSEQ;
> +
> +	op->optinsn.insn = get_optinsn_slot();
> +	if (!op->optinsn.insn)
> +		return -ENOMEM;
> +
> +	buf = (u8 *)op->optinsn.insn;
> +
> +	/* Copy instructions into the out-of-line buffer */
> +	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
> +	if (ret < 0) {
> +		__arch_remove_optimized_kprobe(op, 0);
> +		return ret;
> +	}
> +	op->optinsn.size = ret;
> +
> +	/* Backup instructions which will be replaced by jump address */
> +	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
> +	       RELATIVE_ADDR_SIZE);
> +
> +	/* Copy arch-dep-instance from template */
> +	memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
> +
> +	/* Set probe information */
> +	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
> +
> +	/* Set probe function call */
> +	synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
> +
> +	/* Set returning jmp instruction at the tail of out-of-line buffer */
> +	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
> +			   (u8 *)op->kp.addr + op->optinsn.size);
> +
> +	flush_icache_range((unsigned long) buf,
> +			   (unsigned long) buf + TMPL_END_IDX +
> +			   op->optinsn.size + RELATIVEJUMP_SIZE);
> +	return 0;
> +}
> +
> +/*
> + * Cross-modifying kernel text with stop_machine().
> + * This code originally comes from immediate value.
> + * This does _not_ protect against NMI and MCE. However,
> + * since kprobes can't probe NMI/MCE handler, it is OK for kprobes.
> + */
> +static atomic_t stop_machine_first;
> +static int wrote_text;
> +
> +struct text_poke_param {
> +	void *addr;
> +	const void *opcode;
> +	size_t len;
> +};
> +
> +static int __kprobes stop_machine_multibyte_poke(void *data)
> +{
> +	struct text_poke_param *tpp = data;
> +
> +	if (atomic_dec_and_test(&stop_machine_first)) {
> +		text_poke(tpp->addr, tpp->opcode, tpp->len);
> +		smp_wmb();	/* Make sure other cpus see that this has run */
> +		wrote_text = 1;
> +	} else {
> +		while (!wrote_text)
> +			smp_rmb();
> +		sync_core();
> +	}
> +
> +	flush_icache_range((unsigned long)tpp->addr,
> +			   (unsigned long)tpp->addr + tpp->len);
> +	return 0;
> +}
> +
> +static void *__kprobes __multibyte_poke(void *addr, const void *opcode,
> +					size_t len)
> +{
> +	struct text_poke_param tpp;
> +
> +	tpp.addr = addr;
> +	tpp.opcode = opcode;
> +	tpp.len = len;
> +	atomic_set(&stop_machine_first, 1);
> +	wrote_text = 0;
> +	stop_machine(stop_machine_multibyte_poke, (void *)&tpp, NULL);
> +	return addr;
> +}

As you know, I'd like to have the jump label optimization for
tracepoints, make use of this '__multibyte_poke()' interface. So perhaps
it can be moved to arch/x86/kernel/alternative.c. This is where 'text_poke()'
and friends currently live.

Also, with multiple users we don't want to trample over each others code
patching. Thus, if each sub-system could register some type of
'is_reserved()' callback, and then we can call all these call backs from
the '__multibyte_poke()' routine before we do any patching to make sure
that we aren't trampling on each others code. After a successful
patching, each sub-system can update its reserved set of code as
appropriate. I can code a prototype here, if this makes sense.

thanks,

-Jason


  parent reply	other threads:[~2009-11-24 16:29 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-23 23:21 [PATCH -tip v5 00/10] kprobes: Kprobes jump optimization support Masami Hiramatsu
2009-11-23 23:21 ` [PATCH -tip v5 01/10] kprobes/x86: Cleanup RELATIVEJUMP_INSTRUCTION to RELATIVEJUMP_OPCODE Masami Hiramatsu
2009-11-23 23:21 ` [PATCH -tip v5 02/10] kprobes: Introduce generic insn_slot framework Masami Hiramatsu
2009-11-23 23:21 ` [PATCH -tip v5 03/10] kprobes: Introduce kprobes jump optimization Masami Hiramatsu
2009-11-24  2:44   ` Frederic Weisbecker
2009-11-24  3:31     ` Frederic Weisbecker
2009-11-24 15:34       ` Masami Hiramatsu
2009-11-24 20:14         ` Frederic Weisbecker
2009-11-24 20:59           ` Masami Hiramatsu
2009-11-25 21:08             ` Steven Rostedt
2009-11-25 21:30               ` Masami Hiramatsu
2009-11-24 21:08           ` H. Peter Anvin
2009-11-24 15:34     ` Masami Hiramatsu
2009-11-24 19:45       ` Frederic Weisbecker
2009-11-24 21:15         ` Masami Hiramatsu
2009-11-23 23:21 ` [PATCH -tip v5 04/10] kprobes: Jump optimization sysctl interface Masami Hiramatsu
2009-11-23 23:21 ` [PATCH -tip v5 05/10] kprobes/x86: Boost probes when reentering Masami Hiramatsu
2009-11-23 23:22 ` [PATCH -tip v5 06/10] kprobes/x86: Cleanup save/restore registers Masami Hiramatsu
2009-11-24  2:51   ` Frederic Weisbecker
2009-11-24 15:39     ` Masami Hiramatsu
2009-11-24 20:19       ` Frederic Weisbecker
2009-11-24 15:40     ` Frank Ch. Eigler
2009-11-24 20:20       ` Frederic Weisbecker
2009-11-23 23:22 ` [PATCH -tip v5 07/10] kprobes/x86: Support kprobes jump optimization on x86 Masami Hiramatsu
2009-11-24  3:14   ` Frederic Weisbecker
2009-11-24 16:27   ` Jason Baron [this message]
2009-11-24 17:46     ` Masami Hiramatsu
2009-11-25 16:12       ` Masami Hiramatsu
2009-11-24 16:35   ` H. Peter Anvin
2009-11-24 17:00     ` Masami Hiramatsu
2009-11-23 23:22 ` [PATCH -tip v5 08/10] kprobes: Add documents of jump optimization Masami Hiramatsu
2009-11-23 23:22 ` [PATCH -tip v5 09/10] [RFC] x86: Introduce generic jump patching without stop_machine Masami Hiramatsu
2009-11-23 23:22 ` [PATCH -tip v5 10/10] [RFC] kprobes/x86: Use text_poke_fixup() for jump optimization Masami Hiramatsu
2009-11-24  2:03 ` [PATCH -tip v5 00/10] kprobes: Kprobes jump optimization support Frederic Weisbecker
2009-11-24  3:20   ` Frederic Weisbecker
2009-11-24  7:52     ` Ingo Molnar
2009-11-24 16:06       ` Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091124162708.GA29995@redhat.com \
    --to=jbaron@redhat.com \
    --cc=ananth@in.ibm.com \
    --cc=andersk@ksplice.com \
    --cc=andi@firstfloor.org \
    --cc=dle-develop@lists.sourceforge.net \
    --cc=fweisbec@gmail.com \
    --cc=hch@infradead.org \
    --cc=hpa@zytor.com \
    --cc=jkenisto@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mhiramat@redhat.com \
    --cc=mingo@elte.hu \
    --cc=rostedt@goodmis.org \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=systemtap@sources.redhat.com \
    --cc=tabbott@ksplice.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.