All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] MIPS: tracing: Optimize the implementation
@ 2010-03-11 18:07 Wu Zhangjin
  2010-03-12  8:50 ` Thomas Bogendoerfer
       [not found] ` <4B993B32.7000006@caviumnetworks.com>
  0 siblings, 2 replies; 8+ messages in thread
From: Wu Zhangjin @ 2010-03-11 18:07 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Steven Rostedt, linux-mips, David Daney, Wu Zhangjin

From: Wu Zhangjin <wuzhangjin@gmail.com>

(All of the stuff have been tested for kernel and module, including static
 function tracer, dynamic function tracer and function graph tracer.)

Changes:

  + Reduce the runtime overhead

    o Uses macros instead of variables for the fixed instructions to
    reduce memory access

    o Moves the Initilization of the instructions which will be fixed
    after linking from ftrace_make_nop/ftrace_make_call to
    ftrace_dyn_arch_init() and encodes the instructions through
    uasm(arch/mips/include/asm/uasm.h).

    o A common macro in_module() is defined to determine which space the
    instruction pointer stays in and several related conditional
    statements are converted to conditional operator(? :) statement.

  + Cleanup the whole stuff

    Lots of comments/macros have been cleaned up to let it look better.

Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
---
 arch/mips/include/asm/ftrace.h |   10 ++-
 arch/mips/kernel/ftrace.c      |  201 ++++++++++++++++++++++------------------
 2 files changed, 119 insertions(+), 92 deletions(-)

diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index ce35c9a..e58f9ff 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive for
  * more details.
  *
- * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Copyright (C) 2009, 2010 DSLab, Lanzhou University, China
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
@@ -19,6 +19,12 @@
 extern void _mcount(void);
 #define mcount _mcount
 
+/*
+ * If the Instruction Pointer is in module space (0xc0000000), return ture;
+ * otherwise, it is in kernel space (0x80000000), return false.
+ */
+#define in_module(ip) (unlikely((ip) & 0x40000000))
+
 #define safe_load(load, src, dst, error)		\
 do {							\
 	asm volatile (					\
@@ -83,8 +89,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 
 struct dyn_arch_ftrace {
 };
+#endif /* CONFIG_DYNAMIC_FTRACE */
 
-#endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 #endif /* _ASM_MIPS_FTRACE_H */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index e9e64e0..c38e3d9 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -2,7 +2,7 @@
  * Code for replacing ftrace calls with jumps.
  *
  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
- * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Copyright (C) 2009, 2010 DSLab, Lanzhou University, China
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  *
  * Thanks goes to Steven Rostedt for writing the original x86 version.
@@ -15,15 +15,50 @@
 #include <asm/cacheflush.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/uasm.h>
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-#define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
-#define ADDR_MASK 0x03ffffff	/*  op_code|addr : 31...26|25 ....0 */
-#define jump_insn_encode(op_code, addr) \
-	((unsigned int)((op_code) | (((addr) >> 2) & ADDR_MASK)))
+/* Before linking, the following instructions are fixed. */
+#ifdef CONFIG_CPU_LOONGSON2F
+/* insn: or at, at, zero */
+#define INSN_NOP 0x00200825
+#else
+/* insn: nop */
+#define INSN_NOP 0x00000000
+#endif
+
+/* insn: b 1f; offset = (16 >> 2) */
+#define INSN_B_1F 0x10000004
+
+/* After linking, the following instructions are fixed. */
+static unsigned int insn_jal_ftrace_caller __read_mostly;
+static unsigned int insn_lui_v1_hi16_mcount __read_mostly;
+static unsigned int insn_j_ftrace_graph_caller __read_mostly;
 
-static unsigned int ftrace_nop = 0x00000000;
+/* The following instructions are encoded in the run-time */
+/* insn: jal func; op_code|addr : 31...26|25 ....0 */
+#define jal(addr) \
+	((unsigned int)(0x0c000000 | (((addr) >> 2) & 0x03ffffff)))
+
+static inline void ftrace_dyn_arch_init_insns(void)
+{
+	u32 *buf;
+	unsigned int v1;
+
+	/* lui v1, hi16_mcount */
+	v1 = 3;
+	buf = (u32 *)&insn_lui_v1_hi16_mcount;
+	UASM_i_LA_mostly(&buf, v1, MCOUNT_ADDR);
+
+	/* jal (ftrace_caller + 8), jump over the first two instruction */
+	buf = (u32 *)&insn_jal_ftrace_caller;
+	uasm_i_jal(&buf, (FTRACE_ADDR + 8));
+
+	/* j ftrace_graph_caller */
+	buf = (u32 *)&insn_j_ftrace_graph_caller;
+	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller);
+}
 
 static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 {
@@ -31,7 +66,6 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 
 	/* *(unsigned int *)ip = new_code; */
 	safe_store_code(new_code, ip, faulted);
-
 	if (unlikely(faulted))
 		return -EFAULT;
 
@@ -40,117 +74,101 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 	return 0;
 }
 
-static int lui_v1;
-static int jal_mcount;
-
 int ftrace_make_nop(struct module *mod,
 		    struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int new;
-	int faulted;
 	unsigned long ip = rec->ip;
 
-	/* We have compiled module with -mlong-calls, but compiled the kernel
-	 * without it, we need to cope with them respectively. */
-	if (ip & 0x40000000) {
-		/* record it for ftrace_make_call */
-		if (lui_v1 == 0) {
-			/* lui_v1 = *(unsigned int *)ip; */
-			safe_load_code(lui_v1, ip, faulted);
-
-			if (unlikely(faulted))
-				return -EFAULT;
-		}
-
-		/* lui v1, hi_16bit_of_mcount        --> b 1f (0x10000004)
-		 * addiu v1, v1, low_16bit_of_mcount
-		 * move at, ra
-		 * jalr v1
-		 * nop
-		 * 				     1f: (ip + 12)
-		 */
-		new = 0x10000004;
-	} else {
-		/* record/calculate it for ftrace_make_call */
-		if (jal_mcount == 0) {
-			/* We can record it directly like this:
-			 *     jal_mcount = *(unsigned int *)ip;
-			 * Herein, jump over the first two nop instructions */
-			jal_mcount = jump_insn_encode(JAL, (MCOUNT_ADDR + 8));
-		}
-
-		/* move at, ra
-		 * jalr v1		--> nop
-		 */
-		new = ftrace_nop;
-	}
+	/*
+	 * We have compiled modules with -mlong-calls, but compiled kernel
+	 * without it, therefore, need to cope with them respectively.
+	 *
+	 * For module:
+	 *
+	 *	lui	v1, hi16_mcount		--> b	1f
+	 *	addiu	v1, v1, lo16_mcount
+	 *	move	at, ra
+	 *	jalr	v1
+	 *	 nop
+	 *					1f: (ip + 16)
+	 * For kernel:
+	 *
+	 *	move	at, ra
+	 *	jal	_mcount			--> nop
+	 *
+	 */
+	new = in_module(ip) ? INSN_B_1F : INSN_NOP;
+
 	return ftrace_modify_code(ip, new);
 }
 
-static int modified;	/* initialized as 0 by default */
-
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned int new;
 	unsigned long ip = rec->ip;
 
-	/* We just need to remove the "b ftrace_stub" at the fist time! */
-	if (modified == 0) {
-		modified = 1;
-		ftrace_modify_code(addr, ftrace_nop);
-	}
-	/* ip, module: 0xc0000000, kernel: 0x80000000 */
-	new = (ip & 0x40000000) ? lui_v1 : jal_mcount;
+	new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller;
 
 	return ftrace_modify_code(ip, new);
 }
 
-#define FTRACE_CALL_IP ((unsigned long)(&ftrace_call))
-
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned int new;
 
-	new = jump_insn_encode(JAL, (unsigned long)func);
+	new = jal((unsigned long)func);
 
-	return ftrace_modify_code(FTRACE_CALL_IP, new);
+	return ftrace_modify_code((unsigned long)(&ftrace_call), new);
 }
 
 int __init ftrace_dyn_arch_init(void *data)
 {
+	ftrace_dyn_arch_init_insns();
+
+	/*
+	 * We are safe to remove the "b ftrace_stub" for the current
+	 * ftrace_caller() is almost empty (only the stack operations), and
+	 * most importantly, the calling to mcount will be disabled later in
+	 * ftrace_init(), then there is no 'big' overhead. And in the future,
+	 * we are hoping the function_trace_stop is initialized as 1 then we
+	 * can eventually remove that 'useless' "b ftrace_stub" and the
+	 * following nop. Currently, although we can call ftrace_stop() to set
+	 * function_trace_stop as 1, but it will change the behavior of the
+	 * Function Tracer.
+	 */
+	ftrace_modify_code(MCOUNT_ADDR, INSN_NOP);
+
 	/* The return code is retured via data */
 	*(unsigned long *)data = 0;
 
 	return 0;
 }
-#endif				/* CONFIG_DYNAMIC_FTRACE */
+#endif	/* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 extern void ftrace_graph_call(void);
-#define JMP	0x08000000	/* jump to target directly */
-#define CALL_FTRACE_GRAPH_CALLER \
-	jump_insn_encode(JMP, (unsigned long)(&ftrace_graph_caller))
 #define FTRACE_GRAPH_CALL_IP	((unsigned long)(&ftrace_graph_call))
 
 int ftrace_enable_ftrace_graph_caller(void)
 {
 	return ftrace_modify_code(FTRACE_GRAPH_CALL_IP,
-				  CALL_FTRACE_GRAPH_CALLER);
+			insn_j_ftrace_graph_caller);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-	return ftrace_modify_code(FTRACE_GRAPH_CALL_IP, ftrace_nop);
+	return ftrace_modify_code(FTRACE_GRAPH_CALL_IP, INSN_NOP);
 }
 
-#endif				/* !CONFIG_DYNAMIC_FTRACE */
+#endif	/* !CONFIG_DYNAMIC_FTRACE */
 
 #ifndef KBUILD_MCOUNT_RA_ADDRESS
-#define S_RA_SP	(0xafbf << 16)	/* s{d,w} ra, offset(sp) */
-#define S_R_SP	(0xafb0 << 16)  /* s{d,w} R, offset(sp) */
+#define S_RA_SP	0xafbf0000	/* s{d,w} ra, offset(sp) */
+#define S_R_SP		0xafb00000	/* s{d,w} R, offset(sp) */
 #define OFFSET_MASK	0xffff	/* stack offset range: 0 ~ PT_SIZE */
 
 unsigned long ftrace_get_parent_addr(unsigned long self_addr,
@@ -162,32 +180,32 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
 	unsigned int code;
 	int faulted;
 
-	/* in module or kernel? */
-	if (self_addr & 0x40000000) {
-		/* module: move to the instruction "lui v1, HI_16BIT_OF_MCOUNT" */
-		ip = self_addr - 20;
-	} else {
-		/* kernel: move to the instruction "move ra, at" */
-		ip = self_addr - 12;
-	}
+	/*
+	 * For module, move the ip from calling site of mcount to the
+	 * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for
+	 * kernel, move to the instruction "move ra, at"(offset is 12)
+	 */
+	ip = self_addr - (in_module(self_addr) ? 20 : 12);
 
-	/* search the text until finding the non-store instruction or "s{d,w}
-	 * ra, offset(sp)" instruction */
+	/*
+	 * search the text until finding the non-store instruction or "s{d,w}
+	 * ra, offset(sp)" instruction
+	 */
 	do {
 		ip -= 4;
 
 		/* get the code at "ip": code = *(unsigned int *)ip; */
 		safe_load_code(code, ip, faulted);
-
 		if (unlikely(faulted))
 			return 0;
 
-		/* If we hit the non-store instruction before finding where the
+		/*
+		 * If we hit the non-store instruction before finding where the
 		 * ra is stored, then this is a leaf function and it does not
-		 * store the ra on the stack. */
+		 * store the ra on the stack.
+		 */
 		if ((code & S_R_SP) != S_R_SP)
 			return parent_addr;
-
 	} while (((code & S_RA_SP) != S_RA_SP));
 
 	sp = fp + (code & OFFSET_MASK);
@@ -211,16 +229,17 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long fp)
 {
+	int faulted;
 	unsigned long old;
 	struct ftrace_graph_ent trace;
 	unsigned long return_hooker = (unsigned long)
 	    &return_to_handler;
-	int faulted;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
-	/* "parent" is the stack address saved the return address of the caller
+	/*
+	 * "parent" is the stack address saved the return address of the caller
 	 * of _mcount.
 	 *
 	 * if the gcc < 4.5, a leaf function does not save the return address
@@ -241,12 +260,14 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	if (unlikely(faulted))
 		goto out;
 #ifndef KBUILD_MCOUNT_RA_ADDRESS
-	parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old,
-							 (unsigned long)parent,
-							 fp);
-	/* If fails when getting the stack address of the non-leaf function's
-	 * ra, stop function graph tracer and return */
-	if (parent == 0)
+	parent = (unsigned long *)ftrace_get_parent_addr(
+			self_addr, old, (unsigned long)parent, fp);
+
+	/*
+	 * If fails on getting the stack address of the non-leaf function's ra,
+	 * stop function graph tracer and return
+	 */
+	if (unlikely(parent == 0))
 		goto out;
 #endif
 	/* *parent = return_hooker; */
@@ -272,4 +293,4 @@ out:
 	ftrace_graph_stop();
 	WARN_ON(1);
 }
-#endif				/* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif	/* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
  2010-03-11 18:07 [PATCH] MIPS: tracing: Optimize the implementation Wu Zhangjin
@ 2010-03-12  8:50 ` Thomas Bogendoerfer
  2010-03-12 10:11   ` Wu Zhangjin
                     ` (2 more replies)
       [not found] ` <4B993B32.7000006@caviumnetworks.com>
  1 sibling, 3 replies; 8+ messages in thread
From: Thomas Bogendoerfer @ 2010-03-12  8:50 UTC (permalink / raw)
  To: Wu Zhangjin; +Cc: Ralf Baechle, Steven Rostedt, linux-mips, David Daney

On Fri, Mar 12, 2010 at 02:07:37AM +0800, Wu Zhangjin wrote:
> +/*
> + * If the Instruction Pointer is in module space (0xc0000000), return ture;
> + * otherwise, it is in kernel space (0x80000000), return false.
> + */
> +#define in_module(ip) (unlikely((ip) & 0x40000000))
> +

looks broken for 64bit, but maybe this is a 32bit only feature...

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessary a
good idea.                                                [ RFC1925, 2.3 ]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
  2010-03-12  8:50 ` Thomas Bogendoerfer
@ 2010-03-12 10:11   ` Wu Zhangjin
  2010-03-12 10:14   ` Ralf Baechle
  2010-03-12 16:36   ` David Daney
  2 siblings, 0 replies; 8+ messages in thread
From: Wu Zhangjin @ 2010-03-12 10:11 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Ralf Baechle, Steven Rostedt, linux-mips, David Daney

On Fri, 2010-03-12 at 09:50 +0100, Thomas Bogendoerfer wrote:
> On Fri, Mar 12, 2010 at 02:07:37AM +0800, Wu Zhangjin wrote:
> > +/*
> > + * If the Instruction Pointer is in module space (0xc0000000), return ture;
> > + * otherwise, it is in kernel space (0x80000000), return false.
> > + */
> > +#define in_module(ip) (unlikely((ip) & 0x40000000))
> > +
> 
> looks broken for 64bit, but maybe this is a 32bit only feature...

Actually, this works well on my 64bit YeeLoong laptop ;)

But we should find another better solution.

Regards,
	Wu Zhangjin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
  2010-03-12  8:50 ` Thomas Bogendoerfer
  2010-03-12 10:11   ` Wu Zhangjin
@ 2010-03-12 10:14   ` Ralf Baechle
  2010-03-12 16:36   ` David Daney
  2 siblings, 0 replies; 8+ messages in thread
From: Ralf Baechle @ 2010-03-12 10:14 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Wu Zhangjin, Steven Rostedt, linux-mips, David Daney

On Fri, Mar 12, 2010 at 09:50:53AM +0100, Thomas Bogendoerfer wrote:

> On Fri, Mar 12, 2010 at 02:07:37AM +0800, Wu Zhangjin wrote:
> > +/*
> > + * If the Instruction Pointer is in module space (0xc0000000), return ture;
> > + * otherwise, it is in kernel space (0x80000000), return false.
> > + */
> > +#define in_module(ip) (unlikely((ip) & 0x40000000))
> > +
> 
> looks broken for 64bit, but maybe this is a 32bit only feature...

This gem did already exist in the old code, so no regression:

	if (ip & 0x40000000) {

  Ralf

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
       [not found] ` <4B993B32.7000006@caviumnetworks.com>
@ 2010-03-12 11:43   ` Wu Zhangjin
  2010-03-12 14:57     ` Wu Zhangjin
  2010-03-13  4:10     ` Wu Zhangjin
  0 siblings, 2 replies; 8+ messages in thread
From: Wu Zhangjin @ 2010-03-12 11:43 UTC (permalink / raw)
  To: David Daney; +Cc: Ralf Baechle, Steven Rostedt, linux-mips

Hi,

On Thu, 2010-03-11 at 10:49 -0800, David Daney wrote:
[...]
> > +/*
> > + * If the Instruction Pointer is in module space (0xc0000000), return ture;
> 
> s/ture/true/
> 

yeah.

> > + * otherwise, it is in kernel space (0x80000000), return false.
> > + */
> > +#define in_module(ip) (unlikely((ip)&  0x40000000))
> > +
> 
> This isn't universally true, but it does hold for most configurations I 
> think.

Although I'm not sure who is the exception, we always need an universal
solution, what about this:

Compare module with kernel:

module:

        <saving registers>

        lui     v1, hi16_mcount                <--- ip
        addiu   v1, v1, lo16_mcount
        move    at, ra
        jalr    v1
         nop

kernel:

        <saving registers>

         move    at, ra
         jal     _mcount                       <--- ip

The above _ip_ is the address have been recorded into the __mcount_loc
section of the kernel by scripts/recordmcount.pl, as we can see, for
kernel, the *(ip - 4) is "move at, ra": 03e0082d, a certain instruction,
but for module, there is no possibility(?) of existing a "move at, ra"
at *(ip -4) but a register saving operation("s {d,w} rs, offset(sp)",
prefixed by 0xffb0 for 64bit and 0xafb0 for 32bit. ), and reversly, for
kernel, there is no such instruction there.

And consider the new option -mmcount-ra-address of gcc, some more
instructions will be inserted between "move at, ra" and the calling site
to mcount, so, *(ip-4) will not always be "move at, ra", then we need to
check if there is a "s {d,w} rs, offset(sp)" there, if yes, it is in
module, otherwise, it should be in kernel.

#define S_RS_SP          0xafb00000      /* s{d,w} rs, offset(sp) */

static inline int in_module(ip)
{
	insn = *(ip - 4); /* need to use safe_load_code instead, what about big
endian? */

	return ((insn & S_RS_SP) == S_RS_SP)
}

> 
> [...]
> 
> > +	/*
> > +	 * We have compiled modules with -mlong-calls, but compiled kernel
> > +	 * without it, therefore, need to cope with them respectively.
> > +	 *
> > +	 * For module:
> > +	 *
> > +	 *	lui	v1, hi16_mcount		-->  b	1f
> > +	 *	addiu	v1, v1, lo16_mcount
> > +	 *	move	at, ra
> > +	 *	jalr	v1
> > +	 *	 nop
> > +	 *					1f: (ip + 16)
> 
> 
> Have you thought about just overwriting the jalr here instead of 
> branching around it?  In any event, I don't think you can count on a 
> fixed size code sequence for calling _mcount.  We are passing the 
> address of the save location of RA to _mcount too.  The size of the code 
> will depend on the size of the functions stack frame *and* weather or 
> not it is a leaf function.  Although in the kernel we are unlikely to 
> see functions with large stack frames.

So even with "b 1f", we need to use the right offset, the original
version for module with -mmcount-ra-address should have bugs here for
the offset should be 16 + 8 or 4 (two instructions for leaf function,
one instruction for non-leaf function).

but for we only recorded the position of "lui v1, hi16_mcount" in the
__mcount_loc section, so we need to search the position of the real
calling site of mcount(jalr v1), this will goes to what you have
suggested below.

> 
> 
> > +	 * For kernel:
> > +	 *
> > +	 *	move	at, ra
> > +	 *	jal	_mcount			-->  nop
> > +	 *
> > +	 */
> > +	new = in_module(ip) ? INSN_B_1F : INSN_NOP;
> 
> 
> What would happen if you read the code to find the first JAL or JALR, 
> and then overwrote it with a NOP instead of relying on the function 
> address to figure out which type of prolog it has?
> 
> The reason I suggest this is that sometimes we place the entire kernel 
> in CSSEG.  When this is done, everything has the same (short) _mcount 
> calling sequence.

Right, then, we can search the JAL or JALR, for kernel, will get it
immediatly, for module, will only several instructions, we can do this
searching in ftrace_make_nop and ftrace_make_call at run-time, but just
found we can use the following function to do it in ftrace_init(), looks
good.

static inline int is_call_mcount(unsigned int insn)
{
	return ((insn & JAL) == JAL) || (insn == JALR_V1);
}

static inline unsinged long mcount_callsite(unsigned long addr)
{
	unsigned int insn;

	insn = *(unsigned int *)addr; /*need safe_load_code*/
	if (is_call_mcount(insn))
		return addr;

	do {
		addr += 4;	/* what about big endian? */
		insn = *(unsigned int *)addr; /*need safe_load_code*/
	} while (!is_call_mcount(insn));

	return addr;
}

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
        return mcount_callsite(addr);
}

With the above support, we only need this new ftrace_make_nop:

*(unsigned int *)ip = INSN_NOP;

(But for module, this may need more overhead than "b 1f". )

and ftrace_make_call:

*(unsigned int *)ip = in_module(ip) : INSN_JALR_V1 : insn_jal_mcount;

(And here, for module, we need more time to determine which space we
are.)

Any more suggestion?

Thanks & Regards,
	Wu Zhangjin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
  2010-03-12 11:43   ` Wu Zhangjin
@ 2010-03-12 14:57     ` Wu Zhangjin
  2010-03-13  4:10     ` Wu Zhangjin
  1 sibling, 0 replies; 8+ messages in thread
From: Wu Zhangjin @ 2010-03-12 14:57 UTC (permalink / raw)
  To: David Daney; +Cc: Ralf Baechle, Steven Rostedt, linux-mips

On Fri, 2010-03-12 at 19:43 +0800, Wu Zhangjin wrote:
[...]
> Right, then, we can search the JAL or JALR, for kernel, will get it
> immediatly, for module, will only several instructions, we can do this
> searching in ftrace_make_nop and ftrace_make_call at run-time, but just
> found we can use the following function to do it in ftrace_init(), looks
> good.
> 
> static inline int is_call_mcount(unsigned int insn)
> {
> 	return ((insn & JAL) == JAL) || (insn == JALR_V1);
> }
> 
> static inline unsinged long mcount_callsite(unsigned long addr)
> {
> 	unsigned int insn;
> 
> 	insn = *(unsigned int *)addr; /*need safe_load_code*/
> 	if (is_call_mcount(insn))
> 		return addr;
> 
> 	do {
> 		addr += 4;	/* what about big endian? */
> 		insn = *(unsigned int *)addr; /*need safe_load_code*/
> 	} while (!is_call_mcount(insn));
> 

This is not possible for modules, for currently, the modules are not
loaded yet.

Regards,
	Wu Zhangjin

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
  2010-03-12  8:50 ` Thomas Bogendoerfer
  2010-03-12 10:11   ` Wu Zhangjin
  2010-03-12 10:14   ` Ralf Baechle
@ 2010-03-12 16:36   ` David Daney
  2 siblings, 0 replies; 8+ messages in thread
From: David Daney @ 2010-03-12 16:36 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: Wu Zhangjin, Ralf Baechle, Steven Rostedt, linux-mips

On 03/12/2010 12:50 AM, Thomas Bogendoerfer wrote:
> On Fri, Mar 12, 2010 at 02:07:37AM +0800, Wu Zhangjin wrote:
>> +/*
>> + * If the Instruction Pointer is in module space (0xc0000000), return ture;
>> + * otherwise, it is in kernel space (0x80000000), return false.
>> + */
>> +#define in_module(ip) (unlikely((ip)&  0x40000000))
>> +
>
> looks broken for 64bit, but maybe this is a 32bit only feature...
>

I initially thought the same thing.  However for 64-bit kernels linked 
in ckseg0 it is still true.  If we use the -msym32 optimization, we are 
forced to be in the ckseg space, so for most cases it works.

David Daney.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] MIPS: tracing: Optimize the implementation
  2010-03-12 11:43   ` Wu Zhangjin
  2010-03-12 14:57     ` Wu Zhangjin
@ 2010-03-13  4:10     ` Wu Zhangjin
  1 sibling, 0 replies; 8+ messages in thread
From: Wu Zhangjin @ 2010-03-13  4:10 UTC (permalink / raw)
  To: David Daney; +Cc: Ralf Baechle, Steven Rostedt, linux-mips

On Fri, 2010-03-12 at 19:43 +0800, Wu Zhangjin wrote:
[...]
> > > + * otherwise, it is in kernel space (0x80000000), return false.
> > > + */
> > > +#define in_module(ip) (unlikely((ip)&  0x40000000))
> > > +
> > 
> > This isn't universally true, but it does hold for most configurations I 
> > think.
> 
> Although I'm not sure who is the exception, we always need an universal
> solution, what about this:
> 
> Compare module with kernel:
> 
> module:
> 
>         <saving registers>
> 
>         lui     v1, hi16_mcount                <--- ip
>         addiu   v1, v1, lo16_mcount
>         move    at, ra
>         jalr    v1
>          nop
> 
> kernel:
> 
>         <saving registers>
> 
>          move    at, ra
>          jal     _mcount                       <--- ip
> 
> The above _ip_ is the address have been recorded into the __mcount_loc
> section of the kernel by scripts/recordmcount.pl, as we can see, for
> kernel, the *(ip - 4) is "move at, ra": 03e0082d, a certain instruction,
> but for module, there is no possibility(?) of existing a "move at, ra"
> at *(ip -4) but a register saving operation("s {d,w} rs, offset(sp)",
> prefixed by 0xffb0 for 64bit and 0xafb0 for 32bit. ), and reversly, for
> kernel, there is no such instruction there.
> 
> And consider the new option -mmcount-ra-address of gcc, some more
> instructions will be inserted between "move at, ra" and the calling site
> to mcount, so, *(ip-4) will not always be "move at, ra", then we need to
> check if there is a "s {d,w} rs, offset(sp)" there, if yes, it is in
> module, otherwise, it should be in kernel.
> 
> #define S_RS_SP          0xafb00000      /* s{d,w} rs, offset(sp) */
> 
> static inline int in_module(ip)
> {
> 	insn = *(ip - 4); /* need to use safe_load_code instead, what about big
> endian? */
> 
> 	return ((insn & S_RS_SP) == S_RS_SP)
> }

The above method is not available for some cases, to avoid bring Ftrace
with extra overhead, currently, I will keep using the original version
although it may not work for some cases either.

And to let ftrace_make_nop/ftrace_make_call be lightweight, I will also
keep using the "b 1f" method in the old version.

Regards,
	Wu Zhangjin

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-03-13  4:17 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-11 18:07 [PATCH] MIPS: tracing: Optimize the implementation Wu Zhangjin
2010-03-12  8:50 ` Thomas Bogendoerfer
2010-03-12 10:11   ` Wu Zhangjin
2010-03-12 10:14   ` Ralf Baechle
2010-03-12 16:36   ` David Daney
     [not found] ` <4B993B32.7000006@caviumnetworks.com>
2010-03-12 11:43   ` Wu Zhangjin
2010-03-12 14:57     ` Wu Zhangjin
2010-03-13  4:10     ` Wu Zhangjin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.