* [PATCH 1/2] powerpc/pseries: Use jump labels for hcall tracepoints
@ 2014-07-03 5:52 Anton Blanchard
2014-07-03 5:52 ` [PATCH 2/2] powerpc/pseries: optimise " Anton Blanchard
0 siblings, 1 reply; 2+ messages in thread
From: Anton Blanchard @ 2014-07-03 5:52 UTC (permalink / raw)
To: benh, paulus, mpe; +Cc: linuxppc-dev
hcall tracepoints add quite a few instructions to our hcall path:
plpar_hcall:
mr r2,r2
mfcr r0
stw r0,8(r1)
b 164 <---- start
ld r12,0(r2)
std r12,32(r1)
cmpdi r12,0
beq 164 <---- end
...
We have an unconditional branch that gets noped out during boot and
a load/compare/branch. We also store the tracepoint value to the
stack for the hcall_exit path to use.
By using jump labels we can simplify this to just a single nop that
gets replaced with a branch when the tracepoint is enabled:
plpar_hcall:
mr r2,r2
mfcr r0
stw r0,8(r1)
nop <----
...
If jump labels are not enabled, we fall back to the old method.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: b/arch/powerpc/include/asm/jump_label.h
===================================================================
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -10,6 +10,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/feature-fixups.h>
@@ -42,4 +43,12 @@ struct jump_entry {
jump_label_t key;
};
+#else
+#define ARCH_STATIC_BRANCH(LABEL, KEY) \
+1098: nop; \
+ .pushsection __jump_table, "aw"; \
+ FTR_ENTRY_LONG 1098b, LABEL, KEY; \
+ .popsection
+#endif
+
#endif /* _ASM_POWERPC_JUMP_LABEL_H */
Index: b/arch/powerpc/platforms/pseries/hvCall.S
===================================================================
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -12,9 +12,13 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
+#include <asm/jump_label.h>
+
+ .section ".text"
#ifdef CONFIG_TRACEPOINTS
+#ifndef CONFIG_JUMP_LABEL
.section ".toc","aw"
.globl hcall_tracepoint_refcount
@@ -22,21 +26,13 @@ hcall_tracepoint_refcount:
.llong 0
.section ".text"
+#endif
/*
* precall must preserve all registers. use unused STK_PARAM()
- * areas to save snapshots and opcode. We branch around this
- * in early init (eg when populating the MMU hashtable) by using an
- * unconditional cpu feature.
+ * areas to save snapshots and opcode.
*/
#define HCALL_INST_PRECALL(FIRST_REG) \
-BEGIN_FTR_SECTION; \
- b 1f; \
-END_FTR_SECTION(0, 1); \
- ld r12,hcall_tracepoint_refcount@toc(r2); \
- std r12,32(r1); \
- cmpdi r12,0; \
- beq+ 1f; \
mflr r0; \
std r3,STK_PARAM(R3)(r1); \
std r4,STK_PARAM(R4)(r1); \
@@ -60,22 +56,13 @@ END_FTR_SECTION(0, 1); \
ld r8,STK_PARAM(R8)(r1); \
ld r9,STK_PARAM(R9)(r1); \
ld r10,STK_PARAM(R10)(r1); \
- mtlr r0; \
-1:
+ mtlr r0
/*
* postcall is performed immediately before function return which
- * allows liberal use of volatile registers. We branch around this
- * in early init (eg when populating the MMU hashtable) by using an
- * unconditional cpu feature.
+ * allows liberal use of volatile registers.
*/
#define __HCALL_INST_POSTCALL \
-BEGIN_FTR_SECTION; \
- b 1f; \
-END_FTR_SECTION(0, 1); \
- ld r12,32(r1); \
- cmpdi r12,0; \
- beq+ 1f; \
mflr r0; \
ld r6,STK_PARAM(R3)(r1); \
std r3,STK_PARAM(R3)(r1); \
@@ -87,8 +74,7 @@ END_FTR_SECTION(0, 1); \
addi r1,r1,STACK_FRAME_OVERHEAD; \
ld r0,16(r1); \
ld r3,STK_PARAM(R3)(r1); \
- mtlr r0; \
-1:
+ mtlr r0
#define HCALL_INST_POSTCALL_NORETS \
li r5,0; \
@@ -98,37 +84,62 @@ END_FTR_SECTION(0, 1); \
mr r5,BUFREG; \
__HCALL_INST_POSTCALL
+#ifdef CONFIG_JUMP_LABEL
+#define HCALL_BRANCH(LABEL) \
+ ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
+#else
+
+/*
+ * We branch around this in early init (eg when populating the MMU
+ * hashtable) by using an unconditional cpu feature.
+ */
+#define HCALL_BRANCH(LABEL) \
+BEGIN_FTR_SECTION; \
+ b 1f; \
+END_FTR_SECTION(0, 1); \
+ ld r12,hcall_tracepoint_refcount@toc(r2); \
+ std r12,32(r1); \
+ cmpdi r12,0; \
+ bne- LABEL; \
+1:
+#endif
+
#else
#define HCALL_INST_PRECALL(FIRST_ARG)
#define HCALL_INST_POSTCALL_NORETS
#define HCALL_INST_POSTCALL(BUFREG)
+#define HCALL_BRANCH(LABEL)
#endif
- .text
-
_GLOBAL_TOC(plpar_hcall_norets)
HMT_MEDIUM
mfcr r0
stw r0,8(r1)
-
- HCALL_INST_PRECALL(R4)
-
+ HCALL_BRANCH(plpar_hcall_norets_trace)
HVSC /* invoke the hypervisor */
- HCALL_INST_POSTCALL_NORETS
-
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_norets_trace:
+ HCALL_INST_PRECALL(R4)
+ HVSC
+ HCALL_INST_POSTCALL_NORETS
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr
+#endif
+
_GLOBAL_TOC(plpar_hcall)
HMT_MEDIUM
mfcr r0
stw r0,8(r1)
- HCALL_INST_PRECALL(R5)
+ HCALL_BRANCH(plpar_hcall_trace)
std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
@@ -147,12 +158,40 @@ _GLOBAL_TOC(plpar_hcall)
std r6, 16(r12)
std r7, 24(r12)
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_trace:
+ HCALL_INST_PRECALL(R5)
+
+ std r4,STK_PARAM(R4)(r1)
+ mr r0,r4
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+
+ HVSC
+
+ ld r12,STK_PARAM(R4)(r1)
+ std r4,0(r12)
+ std r5,8(r12)
+ std r6,16(r12)
+ std r7,24(r12)
+
HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
- blr /* return r3 = status */
+ blr
+#endif
/*
* plpar_hcall_raw can be called in real mode. kexec/kdump need some
@@ -194,7 +233,7 @@ _GLOBAL_TOC(plpar_hcall9)
mfcr r0
stw r0,8(r1)
- HCALL_INST_PRECALL(R5)
+ HCALL_BRANCH(plpar_hcall9_trace)
std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
@@ -222,12 +261,49 @@ _GLOBAL_TOC(plpar_hcall9)
std r11,56(r12)
std r0, 64(r12)
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall9_trace:
+ HCALL_INST_PRECALL(R5)
+
+ std r4,STK_PARAM(R4)(r1)
+ mr r0,r4
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+ ld r10,STK_PARAM(R11)(r1)
+ ld r11,STK_PARAM(R12)(r1)
+ ld r12,STK_PARAM(R13)(r1)
+
+ HVSC
+
+ mr r0,r12
+ ld r12,STK_PARAM(R4)(r1)
+ std r4,0(r12)
+ std r5,8(r12)
+ std r6,16(r12)
+ std r7,24(r12)
+ std r8,32(r12)
+ std r9,40(r12)
+ std r10,48(r12)
+ std r11,56(r12)
+ std r0,64(r12)
+
HCALL_INST_POSTCALL(r12)
lwz r0,8(r1)
mtcrf 0xff,r0
- blr /* return r3 = status */
+ blr
+#endif
/* See plpar_hcall_raw to see why this is needed */
_GLOBAL(plpar_hcall9_raw)
Index: b/arch/powerpc/platforms/pseries/lpar.c
===================================================================
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -26,6 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/console.h>
#include <linux/export.h>
+#include <linux/static_key.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -649,6 +650,19 @@ EXPORT_SYMBOL(arch_free_page);
#endif
#ifdef CONFIG_TRACEPOINTS
+#ifdef CONFIG_JUMP_LABEL
+struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
+
+void hcall_tracepoint_regfunc(void)
+{
+ static_key_slow_inc(&hcall_tracepoint_key);
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+ static_key_slow_dec(&hcall_tracepoint_key);
+}
+#else
/*
* We optimise our hcall path by placing hcall_tracepoint_refcount
* directly in the TOC so we can check if the hcall tracepoints are
@@ -658,13 +672,6 @@ EXPORT_SYMBOL(arch_free_page);
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long hcall_tracepoint_refcount;
-/*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
- */
-static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
-
void hcall_tracepoint_regfunc(void)
{
hcall_tracepoint_refcount++;
@@ -674,6 +681,15 @@ void hcall_tracepoint_unregfunc(void)
{
hcall_tracepoint_refcount--;
}
+#endif
+
+/*
+ * Since the tracing code might execute hcalls we need to guard against
+ * recursion. One example of this are spinlocks calling H_YIELD on
+ * shared processor partitions.
+ */
+static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH 2/2] powerpc/pseries: optimise hcall tracepoints
2014-07-03 5:52 [PATCH 1/2] powerpc/pseries: Use jump labels for hcall tracepoints Anton Blanchard
@ 2014-07-03 5:52 ` Anton Blanchard
0 siblings, 0 replies; 2+ messages in thread
From: Anton Blanchard @ 2014-07-03 5:52 UTC (permalink / raw)
To: benh, paulus, mpe; +Cc: linuxppc-dev
Now that we execute the hcall tracepoint entry and exit code out of
line, we can use the same stack across both functions.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: b/arch/powerpc/platforms/pseries/hvCall.S
===================================================================
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -46,33 +46,27 @@ hcall_tracepoint_refcount:
addi r4,r1,STK_PARAM(FIRST_REG); \
stdu r1,-STACK_FRAME_OVERHEAD(r1); \
bl __trace_hcall_entry; \
- addi r1,r1,STACK_FRAME_OVERHEAD; \
- ld r0,16(r1); \
- ld r3,STK_PARAM(R3)(r1); \
- ld r4,STK_PARAM(R4)(r1); \
- ld r5,STK_PARAM(R5)(r1); \
- ld r6,STK_PARAM(R6)(r1); \
- ld r7,STK_PARAM(R7)(r1); \
- ld r8,STK_PARAM(R8)(r1); \
- ld r9,STK_PARAM(R9)(r1); \
- ld r10,STK_PARAM(R10)(r1); \
- mtlr r0
+ ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \
+ ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \
+ ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \
+ ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \
+ ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \
+ ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \
+ ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \
+ ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1)
/*
* postcall is performed immediately before function return which
* allows liberal use of volatile registers.
*/
#define __HCALL_INST_POSTCALL \
- mflr r0; \
- ld r6,STK_PARAM(R3)(r1); \
- std r3,STK_PARAM(R3)(r1); \
+ ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \
+ std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \
mr r4,r3; \
- mr r3,r6; \
- std r0,16(r1); \
- stdu r1,-STACK_FRAME_OVERHEAD(r1); \
+ mr r3,r0; \
bl __trace_hcall_exit; \
+ ld r0,STACK_FRAME_OVERHEAD+16(r1); \
addi r1,r1,STACK_FRAME_OVERHEAD; \
- ld r0,16(r1); \
ld r3,STK_PARAM(R3)(r1); \
mtlr r0
@@ -279,14 +273,14 @@ plpar_hcall9_trace:
mr r7,r8
mr r8,r9
mr r9,r10
- ld r10,STK_PARAM(R11)(r1)
- ld r11,STK_PARAM(R12)(r1)
- ld r12,STK_PARAM(R13)(r1)
+ ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1)
+ ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1)
+ ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1)
HVSC
mr r0,r12
- ld r12,STK_PARAM(R4)(r1)
+ ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1)
std r4,0(r12)
std r5,8(r12)
std r6,16(r12)
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-07-03 5:52 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-03 5:52 [PATCH 1/2] powerpc/pseries: Use jump labels for hcall tracepoints Anton Blanchard
2014-07-03 5:52 ` [PATCH 2/2] powerpc/pseries: optimise " Anton Blanchard
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).