* speed up kernel_thread
@ 2007-03-07 21:29 Anton Blanchard
2007-03-07 22:01 ` Scott Wood
0 siblings, 1 reply; 3+ messages in thread
From: Anton Blanchard @ 2007-03-07 21:29 UTC (permalink / raw)
To: linuxppc-dev; +Cc: paulus
When looking through Ingo's syslet code, I noticed our kernel_thread
implementation executed two syscalls. The patch below avoids these by
calling do_fork directly.
Of course after doing this I had to create kthreadbench to test the
change. Good news, its over 3% faster to create 1000 kernel threads!
Tested on ppc64 only.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: linux-2.6/arch/powerpc/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/process.c 2007-03-07 14:27:57.000000000 -0600
+++ linux-2.6/arch/powerpc/kernel/process.c 2007-03-07 15:15:16.000000000 -0600
@@ -522,6 +522,7 @@
p->thread.regs = NULL; /* no user register state */
} else {
childregs->gpr[1] = usp;
+ childregs->gpr[3] = 0; /* Result from fork() */
p->thread.regs = childregs;
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
@@ -532,7 +533,6 @@
childregs->gpr[2] = childregs->gpr[6];
}
}
- childregs->gpr[3] = 0; /* Result from fork() */
sp -= STACK_FRAME_OVERHEAD;
/*
@@ -650,6 +650,30 @@
#endif /* CONFIG_SPE */
}
+long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+ extern void kernel_thread_helper(void);
+ struct pt_regs regs;
+
+ memset(®s, 0, sizeof(regs));
+
+ /* use non volatile GPRs so they get restored in _switch */
+ regs.gpr[14] = (unsigned long)arg;
+ regs.msr = mfmsr();
+
+#ifdef CONFIG_PPC64
+ regs.gpr[15] = *((unsigned long *)fn);
+ regs.gpr[2] = *((unsigned long *)fn + 1);
+ regs.nip = *((unsigned long *)kernel_thread_helper);
+#else
+ regs.gpr[15] = (unsigned long)fn;
+ regs.nip = (unsigned long)kernel_thread_helper;
+#endif
+
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0,
+ NULL, NULL);
+}
+
#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
| PR_FP_EXC_RES | PR_FP_EXC_INV)
Index: linux-2.6/arch/powerpc/kernel/misc_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/misc_64.S 2007-03-07 14:27:57.000000000 -0600
+++ linux-2.6/arch/powerpc/kernel/misc_64.S 2007-03-07 15:15:16.000000000 -0600
@@ -422,38 +422,14 @@
blr
#endif /* CONFIG_CPU_FREQ_PMAC64 */
-
-/*
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
- */
-_GLOBAL(kernel_thread)
- std r29,-24(r1)
- std r30,-16(r1)
- stdu r1,-STACK_FRAME_OVERHEAD(r1)
- mr r29,r3
- mr r30,r4
- ori r3,r5,CLONE_VM /* flags */
- oris r3,r3,(CLONE_UNTRACED>>16)
- li r4,0 /* new sp (unused) */
- li r0,__NR_clone
- sc
- cmpdi 0,r3,0 /* parent or child? */
- bne 1f /* return if parent */
+_GLOBAL(kernel_thread_helper)
li r0,0
+ mtctr r15
stdu r0,-STACK_FRAME_OVERHEAD(r1)
- ld r2,8(r29)
- ld r29,0(r29)
- mtlr r29 /* fn addr in lr */
- mr r3,r30 /* load arg and call fn */
- blrl
- li r0,__NR_exit /* exit after child exits */
- li r3,0
- sc
-1: addi r1,r1,STACK_FRAME_OVERHEAD
- ld r29,-24(r1)
- ld r30,-16(r1)
- blr
+ mr r3,r14
+ bctrl
+ li r3,0
+ bl .do_exit
/*
* disable_kernel_fp()
Index: linux-2.6/arch/powerpc/kernel/misc_32.S
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/misc_32.S 2007-03-07 14:27:57.000000000 -0600
+++ linux-2.6/arch/powerpc/kernel/misc_32.S 2007-03-07 15:15:16.000000000 -0600
@@ -738,36 +738,15 @@
mr r3,r1 /* Close enough */
blr
-/*
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
- */
-_GLOBAL(kernel_thread)
- stwu r1,-16(r1)
- stw r30,8(r1)
- stw r31,12(r1)
- mr r30,r3 /* function */
- mr r31,r4 /* argument */
- ori r3,r5,CLONE_VM /* flags */
- oris r3,r3,CLONE_UNTRACED>>16
- li r4,0 /* new sp (unused) */
- li r0,__NR_clone
- sc
- cmpwi 0,r3,0 /* parent or child? */
- bne 1f /* return if parent */
- li r0,0 /* make top-level stack frame */
- stwu r0,-16(r1)
- mtlr r30 /* fn addr in lr */
- mr r3,r31 /* load arg and call fn */
- PPC440EP_ERR42
- blrl
- li r0,__NR_exit /* exit if function returns */
- li r3,0
- sc
-1: lwz r30,8(r1)
- lwz r31,12(r1)
- addi r1,r1,16
- blr
+_GLOBAL(kernel_thread_helper)
+ li r0,0
+ mtctr r15
+ stdu r0,-STACK_FRAME_OVERHEAD(r1)
+ mr r3,r14
+ PPC440EP_ERR42 /* XXX required? */
+ bctrl
+ li r3,0
+ bl do_exit
_GLOBAL(kernel_execve)
li r0,__NR_execve
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: speed up kernel_thread
2007-03-07 21:29 speed up kernel_thread Anton Blanchard
@ 2007-03-07 22:01 ` Scott Wood
2007-03-08 5:32 ` Anton Blanchard
0 siblings, 1 reply; 3+ messages in thread
From: Scott Wood @ 2007-03-07 22:01 UTC (permalink / raw)
To: Anton Blanchard; +Cc: linuxppc-dev, paulus
On Wed, Mar 07, 2007 at 03:29:10PM -0600, Anton Blanchard wrote:
> Index: linux-2.6/arch/powerpc/kernel/misc_32.S
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/kernel/misc_32.S 2007-03-07 14:27:57.000000000 -0600
> +++ linux-2.6/arch/powerpc/kernel/misc_32.S 2007-03-07 15:15:16.000000000 -0600
[snip]
> +_GLOBAL(kernel_thread_helper)
> + li r0,0
> + mtctr r15
> + stdu r0,-STACK_FRAME_OVERHEAD(r1)
Shouldn't this be stwu on 32-bit?
-Scott
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: speed up kernel_thread
2007-03-07 22:01 ` Scott Wood
@ 2007-03-08 5:32 ` Anton Blanchard
0 siblings, 0 replies; 3+ messages in thread
From: Anton Blanchard @ 2007-03-08 5:32 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc-dev, paulus
Hi Scott,
> Shouldn't this be stwu on 32-bit?
Definitely, thanks for spotting it.
Anton
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2007-03-08 5:32 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-07 21:29 speed up kernel_thread Anton Blanchard
2007-03-07 22:01 ` Scott Wood
2007-03-08 5:32 ` Anton Blanchard
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).