* [Qemu-devel] [PATCH 1/2] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI
2013-09-05 8:22 [Qemu-devel] [PATCH 0/2] tcg-ppc: use new return-argument ld/st helpers Paolo Bonzini
@ 2013-09-05 8:22 ` Paolo Bonzini
2013-09-05 8:22 ` [Qemu-devel] [PATCH 2/2] tcg-ppc: use new return-argument ld/st helpers Paolo Bonzini
2013-09-05 9:46 ` [Qemu-devel] [Qemu-ppc] [PATCH 0/2] " Alexander Graf
2 siblings, 0 replies; 6+ messages in thread
From: Paolo Bonzini @ 2013-09-05 8:22 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-ppc, aurelien, rth
For the AIX ABI, the function pointer and small area pointer need
to be loaded in the trampoline. The trampoline instead is called
with a normal BL instruction.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
tcg/ppc/tcg-target.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 2595556..204ffbe 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -490,7 +490,8 @@ static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target)
}
}
-static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
+static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg,
+ int lk)
{
#ifdef _CALL_AIX
int reg;
@@ -504,14 +505,14 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
tcg_out32 (s, LWZ | RT (0) | RA (reg));
tcg_out32 (s, MTSPR | RA (0) | CTR);
tcg_out32 (s, LWZ | RT (2) | RA (reg) | 4);
- tcg_out32 (s, BCCTR | BO_ALWAYS | LK);
+ tcg_out32 (s, BCCTR | BO_ALWAYS | lk);
#else
if (const_arg) {
- tcg_out_b (s, LK, arg);
+ tcg_out_b (s, lk, arg);
}
else {
tcg_out32 (s, MTSPR | RS (arg) | LR);
- tcg_out32 (s, BCLR | BO_ALWAYS | LK);
+ tcg_out32 (s, BCLR | BO_ALWAYS | lk);
}
#endif
}
@@ -860,7 +861,7 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1);
+ tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]);
tcg_out32 (s, (tcg_target_long) raddr);
switch (opc) {
case 0|4:
@@ -954,7 +955,7 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
ir++;
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1);
+ tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]);
tcg_out32 (s, (tcg_target_long) raddr);
tcg_out_b (s, 0, (tcg_target_long) raddr);
}
@@ -984,7 +985,7 @@ static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
tcg_out32 (s, MTSPR | RS (3) | LR);
tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
- tcg_out_b (s, 0, (tcg_target_long) ptr);
+ tcg_out_call (s, (tcg_target_long) ptr, 1, 0);
}
#endif
@@ -1493,7 +1494,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_call:
- tcg_out_call (s, args[0], const_args[0]);
+ tcg_out_call (s, args[0], const_args[0], LK);
break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 2/2] tcg-ppc: use new return-argument ld/st helpers
2013-09-05 8:22 [Qemu-devel] [PATCH 0/2] tcg-ppc: use new return-argument ld/st helpers Paolo Bonzini
2013-09-05 8:22 ` [Qemu-devel] [PATCH 1/2] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI Paolo Bonzini
@ 2013-09-05 8:22 ` Paolo Bonzini
2013-09-05 15:17 ` Richard Henderson
2013-09-05 9:46 ` [Qemu-devel] [Qemu-ppc] [PATCH 0/2] " Alexander Graf
2 siblings, 1 reply; 6+ messages in thread
From: Paolo Bonzini @ 2013-09-05 8:22 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-ppc, aurelien, rth
These use a 32-bit load-of-immediate to save a mflr+addi+mtlr sequence.
Tested with a Windows 98 guest (pretty much the most recent thing I
could run on my PPC machine) and kvm-unit-tests's sieve.flat. The
speed up for sieve.flat is as high as 10% for qemu-system-i386, 25%
(no kidding) for qemu-system-x86_64 on my PowerBook G4.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/exec/exec-all.h | 4 +---
tcg/ppc/tcg-target.c | 41 ++++++++++++++++++++---------------------
2 files changed, 21 insertions(+), 24 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index beb4149..a81e805 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr;
In some implementations, we pass the "logical" return address manually;
in others, we must infer the logical return from the true return. */
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
-# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4))
-# elif defined(__arm__)
+# if defined(__arm__)
/* We define two insns between the return address and the branch back to
straight-line. Find and decode that branch insn. */
# define GETRA_LDST(RA) tcg_getra_ldst(RA)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 204ffbe..24a8621 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -550,22 +550,24 @@ static void add_qemu_ldst_label (TCGContext *s,
label->label_ptr[0] = label_ptr;
}
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+ helper_ret_ldub_mmu,
+ helper_ret_lduw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
};
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};
static void *ld_trampolines[4];
@@ -860,9 +862,9 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+ tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
+ tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]);
- tcg_out32 (s, (tcg_target_long) raddr);
switch (opc) {
case 0|4:
tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
@@ -954,10 +956,10 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
}
ir++;
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]);
- tcg_out32 (s, (tcg_target_long) raddr);
- tcg_out_b (s, 0, (tcg_target_long) raddr);
+ tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
+ tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
+ tcg_out32 (s, MTSPR | RS (ir) | LR);
+ tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]);
}
void tcg_out_tb_finalize(TCGContext *s)
@@ -981,9 +983,6 @@ void tcg_out_tb_finalize(TCGContext *s)
#ifdef CONFIG_SOFTMMU
static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
{
- tcg_out32 (s, MFSPR | RT (3) | LR);
- tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
- tcg_out32 (s, MTSPR | RS (3) | LR);
tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
tcg_out_call (s, (tcg_target_long) ptr, 1, 0);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 6+ messages in thread