* [Qemu-devel] [PULL 01/21] configure: Allow command-line configure for ppc32
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 02/21] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI Richard Henderson
` (19 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Similar to manually selecting i386 for an x86_64 host.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
configure | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/configure b/configure
index 05e16da..ef4d9bf 100755
--- a/configure
+++ b/configure
@@ -981,6 +981,14 @@ for opt do
done
case "$cpu" in
+ ppc)
+ CPU_CFLAGS="-m32"
+ LDFLAGS="-m32 $LDFLAGS"
+ ;;
+ ppc64)
+ CPU_CFLAGS="-m64"
+ LDFLAGS="-m64 $LDFLAGS"
+ ;;
sparc)
LDFLAGS="-m32 $LDFLAGS"
CPU_CFLAGS="-m32 -mcpu=ultrasparc"
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 02/21] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 01/21] configure: Allow command-line configure for ppc32 Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 03/21] tcg-ppc: use new return-argument ld/st helpers Richard Henderson
` (18 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: Paolo Bonzini, anthony
From: Paolo Bonzini <pbonzini@redhat.com>
For the AIX ABI, the function pointer and small area pointer need
to be loaded in the trampoline. The trampoline instead is called
with a normal BL instruction.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc/tcg-target.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 2595556..204ffbe 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -490,7 +490,8 @@ static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target)
}
}
-static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
+static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg,
+ int lk)
{
#ifdef _CALL_AIX
int reg;
@@ -504,14 +505,14 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
tcg_out32 (s, LWZ | RT (0) | RA (reg));
tcg_out32 (s, MTSPR | RA (0) | CTR);
tcg_out32 (s, LWZ | RT (2) | RA (reg) | 4);
- tcg_out32 (s, BCCTR | BO_ALWAYS | LK);
+ tcg_out32 (s, BCCTR | BO_ALWAYS | lk);
#else
if (const_arg) {
- tcg_out_b (s, LK, arg);
+ tcg_out_b (s, lk, arg);
}
else {
tcg_out32 (s, MTSPR | RS (arg) | LR);
- tcg_out32 (s, BCLR | BO_ALWAYS | LK);
+ tcg_out32 (s, BCLR | BO_ALWAYS | lk);
}
#endif
}
@@ -860,7 +861,7 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1);
+ tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]);
tcg_out32 (s, (tcg_target_long) raddr);
switch (opc) {
case 0|4:
@@ -954,7 +955,7 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
ir++;
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1);
+ tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]);
tcg_out32 (s, (tcg_target_long) raddr);
tcg_out_b (s, 0, (tcg_target_long) raddr);
}
@@ -984,7 +985,7 @@ static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
tcg_out32 (s, MTSPR | RS (3) | LR);
tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
- tcg_out_b (s, 0, (tcg_target_long) ptr);
+ tcg_out_call (s, (tcg_target_long) ptr, 1, 0);
}
#endif
@@ -1493,7 +1494,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_call:
- tcg_out_call (s, args[0], const_args[0]);
+ tcg_out_call (s, args[0], const_args[0], LK);
break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 03/21] tcg-ppc: use new return-argument ld/st helpers
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 01/21] configure: Allow command-line configure for ppc32 Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 02/21] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 04/21] tcg-ppc: Avoid code for nop move Richard Henderson
` (17 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: Paolo Bonzini, anthony
From: Paolo Bonzini <pbonzini@redhat.com>
These use a 32-bit load-of-immediate to save a mflr+addi+mtlr sequence.
Tested with a Windows 98 guest (pretty much the most recent thing I
could run on my PPC machine) and kvm-unit-tests's sieve.flat. The
speed up for sieve.flat is as high as 10% for qemu-system-i386, 25%
(no kidding) for qemu-system-x86_64 on my PowerBook G4.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
include/exec/exec-all.h | 4 +---
tcg/ppc/tcg-target.c | 41 ++++++++++++++++++++---------------------
2 files changed, 21 insertions(+), 24 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 77242e2..dc27f33 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr;
In some implementations, we pass the "logical" return address manually;
in others, we must infer the logical return from the true return. */
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
-# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4))
-# elif defined(__arm__)
+# if defined(__arm__)
/* We define two insns between the return address and the branch back to
straight-line. Find and decode that branch insn. */
# define GETRA_LDST(RA) tcg_getra_ldst(RA)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 204ffbe..24a8621 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -550,22 +550,24 @@ static void add_qemu_ldst_label (TCGContext *s,
label->label_ptr[0] = label_ptr;
}
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+ helper_ret_ldub_mmu,
+ helper_ret_lduw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
};
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};
static void *ld_trampolines[4];
@@ -860,9 +862,9 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+ tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
+ tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]);
- tcg_out32 (s, (tcg_target_long) raddr);
switch (opc) {
case 0|4:
tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
@@ -954,10 +956,10 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
}
ir++;
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]);
- tcg_out32 (s, (tcg_target_long) raddr);
- tcg_out_b (s, 0, (tcg_target_long) raddr);
+ tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
+ tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
+ tcg_out32 (s, MTSPR | RS (ir) | LR);
+ tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]);
}
void tcg_out_tb_finalize(TCGContext *s)
@@ -981,9 +983,6 @@ void tcg_out_tb_finalize(TCGContext *s)
#ifdef CONFIG_SOFTMMU
static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
{
- tcg_out32 (s, MFSPR | RT (3) | LR);
- tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
- tcg_out32 (s, MTSPR | RS (3) | LR);
tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
tcg_out_call (s, (tcg_target_long) ptr, 1, 0);
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 04/21] tcg-ppc: Avoid code for nop move
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (2 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 03/21] tcg-ppc: use new return-argument ld/st helpers Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 05/21] tcg-ppc: Cleanup tcg_out_qemu_ld/st_slow_path Richard Henderson
` (16 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
While these are rare from code that's been through the optimizer,
it's not uncommon within the tcg backend.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc/tcg-target.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 24a8621..965108b 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -450,7 +450,9 @@ static const uint32_t tcg_to_bc[] = {
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
- tcg_out32 (s, OR | SAB (arg, ret, arg));
+ if (ret != arg) {
+ tcg_out32(s, OR | SAB(arg, ret, arg));
+ }
}
static void tcg_out_movi(TCGContext *s, TCGType type,
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 05/21] tcg-ppc: Cleanup tcg_out_qemu_ld/st_slow_path
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (3 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 04/21] tcg-ppc: Avoid code for nop move Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 06/21] tcg-ppc: Use conditional branch and link to slow path Richard Henderson
` (15 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Coding style fixes. Use TCGReg enumeration values instead of raw
numbers. Don't needlessly pull the whole TCGLabelQemuLdst struct
into local variables. Less conditional compilation.
No functional changes.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc/tcg-target.c | 147 ++++++++++++++++++++-------------------------------
1 file changed, 58 insertions(+), 89 deletions(-)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 965108b..a5f1f99 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -836,132 +836,101 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
}
#if defined(CONFIG_SOFTMMU)
-static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- int s_bits;
- int ir;
- int opc = label->opc;
- int mem_index = label->mem_index;
- int data_reg = label->datalo_reg;
- int data_reg2 = label->datahi_reg;
- int addr_reg = label->addrlo_reg;
- uint8_t *raddr = label->raddr;
- uint8_t **label_ptr = &label->label_ptr[0];
+ TCGReg ir, datalo, datahi;
- s_bits = opc & 3;
-
- /* resolve label address */
- reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
+ reloc_pc14 (l->label_ptr[0], (uintptr_t)s->code_ptr);
- /* slow path */
- ir = 4;
-#if TARGET_LONG_BITS == 32
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
-#else
+ ir = TCG_REG_R4;
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg);
+ } else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
- ir |= 1;
-#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
+ ir |= 1;
#endif
- tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
- tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
- tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]);
- switch (opc) {
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrhi_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg);
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index);
+ tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr);
+ tcg_out_b(s, LK, (uintptr_t)ld_trampolines[l->opc & 3]);
+
+ datalo = l->datalo_reg;
+ switch (l->opc) {
case 0|4:
- tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
+ tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3));
break;
case 1|4:
- tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
+ tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3));
break;
case 0:
case 1:
case 2:
- if (data_reg != 3)
- tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3);
break;
case 3:
- if (data_reg == 3) {
- if (data_reg2 == 4) {
- tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
- tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
- tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
- }
- else {
- tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
- tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
- }
- }
- else {
- if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
- if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
+ datahi = l->datahi_reg;
+ if (datalo != TCG_REG_R3) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4);
+ tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
+ } else if (datahi != TCG_REG_R4) {
+ tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4);
+ tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0);
}
break;
}
- /* Jump to the code corresponding to next IR of qemu_st */
- tcg_out_b (s, 0, (tcg_target_long) raddr);
+ tcg_out_b (s, 0, (uintptr_t)l->raddr);
}
-static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- int ir;
- int opc = label->opc;
- int mem_index = label->mem_index;
- int data_reg = label->datalo_reg;
- int data_reg2 = label->datahi_reg;
- int addr_reg = label->addrlo_reg;
- uint8_t *raddr = label->raddr;
- uint8_t **label_ptr = &label->label_ptr[0];
-
- /* resolve label address */
- reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
-
- /* slow path */
- ir = 4;
-#if TARGET_LONG_BITS == 32
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
-#else
+ TCGReg ir, datalo;
+
+ reloc_pc14 (l->label_ptr[0], (tcg_target_long) s->code_ptr);
+
+ ir = TCG_REG_R4;
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg);
+ } else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
- ir |= 1;
-#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
+ ir |= 1;
#endif
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrhi_reg);
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg);
+ }
- switch (opc) {
+ datalo = l->datalo_reg;
+ switch (l->opc) {
case 0:
- tcg_out32 (s, (RLWINM
- | RA (ir)
- | RS (data_reg)
- | SH (0)
- | MB (24)
- | ME (31)));
+ tcg_out32(s, (RLWINM | RA (ir) | RS (datalo)
+ | SH (0) | MB (24) | ME (31)));
break;
case 1:
- tcg_out32 (s, (RLWINM
- | RA (ir)
- | RS (data_reg)
- | SH (0)
- | MB (16)
- | ME (31)));
+ tcg_out32(s, (RLWINM | RA (ir) | RS (datalo)
+ | SH (0) | MB (16) | ME (31)));
break;
case 2:
- tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir, datalo);
break;
case 3:
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2);
- tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir++, l->datahi_reg);
+ tcg_out_mov(s, TCG_TYPE_I32, ir, datalo);
break;
}
ir++;
- tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index);
- tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr);
- tcg_out32 (s, MTSPR | RS (ir) | LR);
- tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]);
+ tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index);
+ tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr);
+ tcg_out32(s, MTSPR | RS(ir) | LR);
+ tcg_out_b(s, 0, (uintptr_t)st_trampolines[l->opc]);
}
void tcg_out_tb_finalize(TCGContext *s)
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 06/21] tcg-ppc: Use conditional branch and link to slow path
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (4 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 05/21] tcg-ppc: Cleanup tcg_out_qemu_ld/st_slow_path Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 07/21] tcg-ppc: Fix and cleanup tcg_out_tlb_check Richard Henderson
` (14 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Saves one insn per slow path. Note that we can no longer use
a tail call into the store helper.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc/tcg-target.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index a5f1f99..516d28f 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -611,9 +611,14 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#endif
+
+ /* Use a conditional branch-and-link so that we load a pointer to
+ somewhere within the current opcode, for passing on to the helper.
+ This address cannot be used for a tail call, but it's shorter
+ than forming an address from scratch. */
*label_ptr = s->code_ptr;
retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
- tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE);
+ tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK);
/* r0 now contains &env->tlb_table[mem_index][index].addr_x */
tcg_out32 (s, (LWZ
@@ -853,7 +858,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg);
}
tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index);
- tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr);
+ tcg_out32(s, MFSPR | RT(ir++) | LR);
tcg_out_b(s, LK, (uintptr_t)ld_trampolines[l->opc & 3]);
datalo = l->datalo_reg;
@@ -928,9 +933,9 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
ir++;
tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index);
- tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr);
- tcg_out32(s, MTSPR | RS(ir) | LR);
- tcg_out_b(s, 0, (uintptr_t)st_trampolines[l->opc]);
+ tcg_out32(s, MFSPR | RT(ir++) | LR);
+ tcg_out_b(s, LK, (uintptr_t)st_trampolines[l->opc]);
+ tcg_out_b(s, 0, (uintptr_t)l->raddr);
}
void tcg_out_tb_finalize(TCGContext *s)
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 07/21] tcg-ppc: Fix and cleanup tcg_out_tlb_check
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (5 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 06/21] tcg-ppc: Use conditional branch and link to slow path Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 08/21] tcg-ppc64: Reformat tcg-target.c Richard Henderson
` (13 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
The fix is that sparc has so many mmu modes that the last one overflowed
the 16-bit signed offset we assumed would fit. Handle this, and check
the new assumption at compile time.
Load the tlb addend earlier for the fast path.
Remove the explicit address + addend and make use of index addressing.
Adjust constraints for qemu_ld64 such that we don't clobber the address
register or tlb addend before loading both values.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc/tcg-target.c | 302 ++++++++++++++++++++++-----------------------------
1 file changed, 127 insertions(+), 175 deletions(-)
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 516d28f..97e33ed 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -575,42 +575,72 @@ static const void * const qemu_st_helpers[4] = {
static void *ld_trampolines[4];
static void *st_trampolines[4];
-static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
- int addr_reg, int addr_reg2, int s_bits,
- int offset1, int offset2, uint8_t **label_ptr)
+/* Perform the TLB load and compare. Branches to the slow path, placing the
+ address of the branch in *LABEL_PTR. Loads the addend of the TLB into R0.
+ Clobbers R1 and R2. */
+
+static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
+ TCGReg addrlo, TCGReg addrhi, int s_bits,
+ int mem_index, int is_load, uint8_t **label_ptr)
{
+ int cmp_off =
+ (is_load
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
uint16_t retranst;
+ TCGReg base = TCG_AREG0;
+
+ /* Extract the page index, shifted into place for tlb index. */
+ tcg_out32(s, (RLWINM
+ | RA(r0)
+ | RS(addrlo)
+ | SH(32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS))
+ | MB(32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS))
+ | ME(31 - CPU_TLB_ENTRY_BITS)));
+
+ /* Compensate for very large offsets. */
+ if (add_off >= 0x8000) {
+ /* Most target env are smaller than 32k; none are larger than 64k.
+ Simplify the logic here merely to offset by 0x7ff0, giving us a
+ range just shy of 64k. Check this assumption. */
+ QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
+ tlb_table[NB_MMU_MODES - 1][1])
+ > 0x7ff0 + 0x7fff);
+ tcg_out32(s, ADDI | RT(r1) | RA(base) | 0x7ff0);
+ base = r1;
+ cmp_off -= 0x7ff0;
+ add_off -= 0x7ff0;
+ }
- tcg_out32 (s, (RLWINM
- | RA (r0)
- | RS (addr_reg)
- | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS))
- | MB (32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS))
- | ME (31 - CPU_TLB_ENTRY_BITS)
- )
- );
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0));
- tcg_out32 (s, (LWZU
- | RT (r1)
- | RA (r0)
- | offset1
- )
- );
- tcg_out32 (s, (RLWINM
- | RA (r2)
- | RS (addr_reg)
- | SH (0)
- | MB ((32 - s_bits) & 31)
- | ME (31 - TARGET_PAGE_BITS)
- )
- );
+ /* Clear the non-page, non-alignment bits from the address. */
+ tcg_out32(s, (RLWINM
+ | RA(r2)
+ | RS(addrlo)
+ | SH(0)
+ | MB((32 - s_bits) & 31)
+ | ME(31 - TARGET_PAGE_BITS)));
- tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1));
-#if TARGET_LONG_BITS == 64
- tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
- tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
-#endif
+ tcg_out32(s, ADD | RT(r0) | RA(r0) | RB(base));
+ base = r0;
+
+ /* Load the tlb comparator. */
+ tcg_out32(s, LWZ | RT(r1) | RA(base) | (cmp_off & 0xffff));
+
+ tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1));
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out32(s, LWZ | RT(r1) | RA(base) | ((cmp_off + 4) & 0xffff));
+ }
+
+ /* Load the tlb addend for use on the fast path.
+ Do this asap to minimize load delay. */
+ tcg_out32(s, LWZ | RT(r0) | RA(base) | (add_off & 0xffff));
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out32(s, CMP | BF(6) | RA(addrhi) | RB(r1));
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ }
/* Use a conditional branch-and-link so that we load a pointer to
somewhere within the current opcode, for passing on to the helper.
@@ -619,58 +649,31 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
*label_ptr = s->code_ptr;
retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK);
-
- /* r0 now contains &env->tlb_table[mem_index][index].addr_x */
- tcg_out32 (s, (LWZ
- | RT (r0)
- | RA (r0)
- | offset2
- )
- );
- /* r0 = env->tlb_table[mem_index][index].addend */
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
- /* r0 = env->tlb_table[mem_index][index].addend + addr */
-
}
#endif
static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
{
- int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap;
+ TCGReg addrlo, datalo, datahi, rbase;
+ int bswap;
#ifdef CONFIG_SOFTMMU
- int mem_index, s_bits, r2, addr_reg2;
+ int mem_index;
+ TCGReg addrhi;
uint8_t *label_ptr;
#endif
- data_reg = *args++;
- if (opc == 3)
- data_reg2 = *args++;
- else
- data_reg2 = 0;
- addr_reg = *args++;
+ datalo = *args++;
+ datahi = (opc == 3 ? *args++ : 0);
+ addrlo = *args++;
#ifdef CONFIG_SOFTMMU
-#if TARGET_LONG_BITS == 64
- addr_reg2 = *args++;
-#else
- addr_reg2 = 0;
-#endif
+ addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
mem_index = *args;
- s_bits = opc & 3;
- r0 = 3;
- r1 = 4;
- r2 = 0;
- rbase = 0;
-
- tcg_out_tlb_check (
- s, r0, r1, r2, addr_reg, addr_reg2, s_bits,
- offsetof (CPUArchState, tlb_table[mem_index][0].addr_read),
- offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read),
- &label_ptr
- );
+
+ tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo,
+ addrhi, opc & 3, mem_index, 0, &label_ptr);
+ rbase = TCG_REG_R3;
#else /* !CONFIG_SOFTMMU */
- r0 = addr_reg;
- r1 = 3;
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
@@ -683,106 +686,72 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
switch (opc) {
default:
case 0:
- tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0));
+ tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo));
break;
case 0|4:
- tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg));
+ tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, EXTSB | RA(datalo) | RS(datalo));
break;
case 1:
- if (bswap)
- tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0));
+ tcg_out32(s, (bswap ? LHBRX : LHZX) | TAB(datalo, rbase, addrlo));
break;
case 1|4:
if (bswap) {
- tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg));
+ tcg_out32(s, LHBRX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, EXTSH | RA(datalo) | RS(datalo));
+ } else {
+ tcg_out32(s, LHAX | TAB(datalo, rbase, addrlo));
}
- else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0));
break;
case 2:
- if (bswap)
- tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0));
+ tcg_out32(s, (bswap ? LWBRX : LWZX) | TAB(datalo, rbase, addrlo));
break;
case 3:
if (bswap) {
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, LWBRX | TAB (data_reg2, rbase, r1));
- }
- else {
-#ifdef CONFIG_USE_GUEST_BASE
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, LWZX | TAB (data_reg2, rbase, r0));
- tcg_out32 (s, LWZX | TAB (data_reg, rbase, r1));
-#else
- if (r0 == data_reg2) {
- tcg_out32 (s, LWZ | RT (0) | RA (r0));
- tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4);
- tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 0);
- }
- else {
- tcg_out32 (s, LWZ | RT (data_reg2) | RA (r0));
- tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4);
- }
-#endif
+ tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4);
+ tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
+ } else if (rbase != 0) {
+ tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4);
+ tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
+ tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
+ } else if (addrlo == datahi) {
+ tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4);
+ tcg_out32(s, LWZ | RT(datahi) | RA(addrlo));
+ } else {
+ tcg_out32(s, LWZ | RT(datahi) | RA(addrlo));
+ tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4);
}
break;
}
#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label (s,
- 1,
- opc,
- data_reg,
- data_reg2,
- addr_reg,
- addr_reg2,
- mem_index,
- s->code_ptr,
- label_ptr);
+ add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo,
+ addrhi, mem_index, s->code_ptr, label_ptr);
#endif
}
static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
{
- int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase;
+ TCGReg addrlo, datalo, datahi, rbase;
+ int bswap;
#ifdef CONFIG_SOFTMMU
- int mem_index, r2, addr_reg2;
+ int mem_index;
+ TCGReg addrhi;
uint8_t *label_ptr;
#endif
- data_reg = *args++;
- if (opc == 3)
- data_reg2 = *args++;
- else
- data_reg2 = 0;
- addr_reg = *args++;
+ datalo = *args++;
+ datahi = (opc == 3 ? *args++ : 0);
+ addrlo = *args++;
#ifdef CONFIG_SOFTMMU
-#if TARGET_LONG_BITS == 64
- addr_reg2 = *args++;
-#else
- addr_reg2 = 0;
-#endif
+ addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
mem_index = *args;
- r0 = 3;
- r1 = 4;
- r2 = 0;
- rbase = 0;
-
- tcg_out_tlb_check (
- s, r0, r1, r2, addr_reg, addr_reg2, opc & 3,
- offsetof (CPUArchState, tlb_table[mem_index][0].addr_write),
- offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write),
- &label_ptr
- );
+
+ tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo,
+ addrhi, opc & 3, mem_index, 0, &label_ptr);
+ rbase = TCG_REG_R3;
#else /* !CONFIG_SOFTMMU */
- r0 = addr_reg;
- r1 = 3;
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
@@ -793,50 +762,33 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
#endif
switch (opc) {
case 0:
- tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
+ tcg_out32(s, STBX | SAB(datalo, rbase, addrlo));
break;
case 1:
- if (bswap)
- tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
+ tcg_out32(s, (bswap ? STHBRX : STHX) | SAB(datalo, rbase, addrlo));
break;
case 2:
- if (bswap)
- tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
+ tcg_out32(s, (bswap ? STWBRX : STWX) | SAB(datalo, rbase, addrlo));
break;
case 3:
if (bswap) {
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1));
- }
- else {
-#ifdef CONFIG_USE_GUEST_BASE
- tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0));
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, STWX | SAB (data_reg, rbase, r1));
-#else
- tcg_out32 (s, STW | RS (data_reg2) | RA (r0));
- tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4);
-#endif
+ tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4);
+ tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
+ tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
+ } else if (rbase != 0) {
+ tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4);
+ tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
+ tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
+ } else {
+ tcg_out32(s, STW | RS(datahi) | RA(addrlo));
+ tcg_out32(s, STW | RS(datalo) | RA(addrlo) | 4);
}
break;
}
#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label (s,
- 0,
- opc,
- data_reg,
- data_reg2,
- addr_reg,
- addr_reg2,
- mem_index,
- s->code_ptr,
- label_ptr);
+ add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
+ mem_index, s->code_ptr, label_ptr);
#endif
}
@@ -1994,7 +1946,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_qemu_ld16u, { "r", "L" } },
{ INDEX_op_qemu_ld16s, { "r", "L" } },
{ INDEX_op_qemu_ld32, { "r", "L" } },
- { INDEX_op_qemu_ld64, { "r", "r", "L" } },
+ { INDEX_op_qemu_ld64, { "L", "L", "L" } },
{ INDEX_op_qemu_st8, { "K", "K" } },
{ INDEX_op_qemu_st16, { "K", "K" } },
@@ -2006,7 +1958,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_qemu_ld16u, { "r", "L", "L" } },
{ INDEX_op_qemu_ld16s, { "r", "L", "L" } },
{ INDEX_op_qemu_ld32, { "r", "L", "L" } },
- { INDEX_op_qemu_ld64, { "r", "L", "L", "L" } },
+ { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
{ INDEX_op_qemu_st8, { "K", "K", "K" } },
{ INDEX_op_qemu_st16, { "K", "K", "K" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 08/21] tcg-ppc64: Reformat tcg-target.c
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (6 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 07/21] tcg-ppc: Fix and cleanup tcg_out_tlb_check Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 09/21] tcg-ppc64: More use of TAI and SAI helper macros Richard Henderson
` (12 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Whitespace and brace changes only.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 478 ++++++++++++++++++++++++-------------------------
1 file changed, 239 insertions(+), 239 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 0bd1e0c..b554b00 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -173,58 +173,59 @@ static const int tcg_target_callee_save_regs[] = {
TCG_REG_R31
};
-static uint32_t reloc_pc24_val (void *pc, tcg_target_long target)
+static uint32_t reloc_pc24_val(void *pc, tcg_target_long target)
{
tcg_target_long disp;
- disp = target - (tcg_target_long) pc;
- if ((disp << 38) >> 38 != disp)
- tcg_abort ();
+ disp = target - (tcg_target_long)pc;
+ if ((disp << 38) >> 38 != disp) {
+ tcg_abort();
+ }
return disp & 0x3fffffc;
}
-static void reloc_pc24 (void *pc, tcg_target_long target)
+static void reloc_pc24(void *pc, tcg_target_long target)
{
- *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3fffffc)
- | reloc_pc24_val (pc, target);
+ *(uint32_t *)pc = (*(uint32_t *)pc & ~0x3fffffc)
+ | reloc_pc24_val(pc, target);
}
-static uint16_t reloc_pc14_val (void *pc, tcg_target_long target)
+static uint16_t reloc_pc14_val(void *pc, tcg_target_long target)
{
tcg_target_long disp;
- disp = target - (tcg_target_long) pc;
- if (disp != (int16_t) disp)
- tcg_abort ();
+ disp = target - (tcg_target_long)pc;
+ if (disp != (int16_t) disp) {
+ tcg_abort();
+ }
return disp & 0xfffc;
}
-static void reloc_pc14 (void *pc, tcg_target_long target)
+static void reloc_pc14(void *pc, tcg_target_long target)
{
- *(uint32_t *) pc = (*(uint32_t *) pc & ~0xfffc)
- | reloc_pc14_val (pc, target);
+ *(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target);
}
-static void patch_reloc (uint8_t *code_ptr, int type,
- intptr_t value, intptr_t addend)
+static void patch_reloc(uint8_t *code_ptr, int type,
+ intptr_t value, intptr_t addend)
{
value += addend;
switch (type) {
case R_PPC_REL14:
- reloc_pc14 (code_ptr, value);
+ reloc_pc14(code_ptr, value);
break;
case R_PPC_REL24:
- reloc_pc24 (code_ptr, value);
+ reloc_pc24(code_ptr, value);
break;
default:
- tcg_abort ();
+ tcg_abort();
}
}
/* parse target specific constraints */
-static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
const char *ct_str;
@@ -232,29 +233,29 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
switch (ct_str[0]) {
case 'A': case 'B': case 'C': case 'D':
ct->ct |= TCG_CT_REG;
- tcg_regset_set_reg (ct->u.regs, 3 + ct_str[0] - 'A');
+ tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
break;
case 'r':
ct->ct |= TCG_CT_REG;
- tcg_regset_set32 (ct->u.regs, 0, 0xffffffff);
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
break;
case 'L': /* qemu_ld constraint */
ct->ct |= TCG_CT_REG;
- tcg_regset_set32 (ct->u.regs, 0, 0xffffffff);
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
#ifdef CONFIG_SOFTMMU
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
#endif
break;
case 'S': /* qemu_st constraint */
ct->ct |= TCG_CT_REG;
- tcg_regset_set32 (ct->u.regs, 0, 0xffffffff);
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
#ifdef CONFIG_SOFTMMU
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
- tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
#endif
break;
case 'I':
@@ -284,8 +285,8 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
}
/* test if a constant matches the constraint */
-static int tcg_target_const_match (tcg_target_long val,
- const TCGArgConstraint *arg_ct)
+static int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
{
int ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
@@ -425,7 +426,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define STHX XO31(407)
#define STWX XO31(151)
-#define SPR(a,b) ((((a)<<5)|(b))<<11)
+#define SPR(a, b) ((((a)<<5)|(b))<<11)
#define LR SPR(8, 0)
#define CTR SPR(9, 0)
@@ -439,7 +440,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define SRADI XO31(413<<1)
#define TW XO31( 4)
-#define TRAP (TW | TO (31))
+#define TRAP (TW | TO(31))
#define RT(r) ((r)<<21)
#define RS(r) ((r)<<21)
@@ -467,9 +468,9 @@ static int tcg_target_const_match (tcg_target_long val,
#define BB(n, c) (((c)+((n)*4))<<11)
#define BC_(n, c) (((c)+((n)*4))<<6)
-#define BO_COND_TRUE BO (12)
-#define BO_COND_FALSE BO ( 4)
-#define BO_ALWAYS BO (20)
+#define BO_COND_TRUE BO(12)
+#define BO_COND_FALSE BO( 4)
+#define BO_ALWAYS BO(20)
enum {
CR_LT,
@@ -479,16 +480,16 @@ enum {
};
static const uint32_t tcg_to_bc[] = {
- [TCG_COND_EQ] = BC | BI (7, CR_EQ) | BO_COND_TRUE,
- [TCG_COND_NE] = BC | BI (7, CR_EQ) | BO_COND_FALSE,
- [TCG_COND_LT] = BC | BI (7, CR_LT) | BO_COND_TRUE,
- [TCG_COND_GE] = BC | BI (7, CR_LT) | BO_COND_FALSE,
- [TCG_COND_LE] = BC | BI (7, CR_GT) | BO_COND_FALSE,
- [TCG_COND_GT] = BC | BI (7, CR_GT) | BO_COND_TRUE,
- [TCG_COND_LTU] = BC | BI (7, CR_LT) | BO_COND_TRUE,
- [TCG_COND_GEU] = BC | BI (7, CR_LT) | BO_COND_FALSE,
- [TCG_COND_LEU] = BC | BI (7, CR_GT) | BO_COND_FALSE,
- [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE,
+ [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE,
+ [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE,
+ [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE,
+ [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE,
+ [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE,
+ [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE,
+ [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
+ [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
+ [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
+ [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
};
/* The low bit here is set if the RA and RB fields must be inverted. */
@@ -508,15 +509,15 @@ static const uint32_t tcg_to_isel[] = {
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
- tcg_out32 (s, OR | SAB (arg, ret, arg));
+ tcg_out32(s, OR | SAB(arg, ret, arg));
}
static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
int sh, int mb)
{
- sh = SH (sh & 0x1f) | (((sh >> 5) & 1) << 1);
- mb = MB64 ((mb >> 5) | ((mb << 1) & 0x3f));
- tcg_out32 (s, op | RA (ra) | RS (rs) | sh | mb);
+ sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
+ mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
+ tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
}
static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
@@ -686,44 +687,42 @@ static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
tcg_out_zori32(s, dst, src, c, XORI, XORIS);
}
-static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target)
+static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target)
{
tcg_target_long disp;
- disp = target - (tcg_target_long) s->code_ptr;
- if ((disp << 38) >> 38 == disp)
- tcg_out32 (s, B | (disp & 0x3fffffc) | mask);
- else {
- tcg_out_movi (s, TCG_TYPE_I64, 0, (tcg_target_long) target);
- tcg_out32 (s, MTSPR | RS (0) | CTR);
- tcg_out32 (s, BCCTR | BO_ALWAYS | mask);
+ disp = target - (tcg_target_long)s->code_ptr;
+ if ((disp << 38) >> 38 == disp) {
+ tcg_out32(s, B | (disp & 0x3fffffc) | mask);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, 0, (tcg_target_long)target);
+ tcg_out32(s, MTSPR | RS(0) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS | mask);
}
}
-static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
+static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
{
#ifdef __APPLE__
if (const_arg) {
- tcg_out_b (s, LK, arg);
- }
- else {
- tcg_out32 (s, MTSPR | RS (arg) | LR);
- tcg_out32 (s, BCLR | BO_ALWAYS | LK);
+ tcg_out_b(s, LK, arg);
+ } else {
+ tcg_out32(s, MTSPR | RS(arg) | LR);
+ tcg_out32(s, BCLR | BO_ALWAYS | LK);
}
#else
- int reg;
+ int reg = arg;
if (const_arg) {
reg = 2;
- tcg_out_movi (s, TCG_TYPE_I64, reg, arg);
+ tcg_out_movi(s, TCG_TYPE_I64, reg, arg);
}
- else reg = arg;
- tcg_out32 (s, LD | RT (0) | RA (reg));
- tcg_out32 (s, MTSPR | RA (0) | CTR);
- tcg_out32 (s, LD | RT (11) | RA (reg) | 16);
- tcg_out32 (s, LD | RT (2) | RA (reg) | 8);
- tcg_out32 (s, BCCTR | BO_ALWAYS | LK);
+ tcg_out32(s, LD | RT(0) | RA(reg));
+ tcg_out32(s, MTSPR | RA(0) | CTR);
+ tcg_out32(s, LD | RT(11) | RA(reg) | 16);
+ tcg_out32(s, LD | RT(2) | RA(reg) | 8);
+ tcg_out32(s, BCCTR | BO_ALWAYS | LK);
#endif
}
@@ -741,7 +740,7 @@ static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
int offset, int op1, int op2)
{
- if (offset == (int16_t) (offset & ~3)) {
+ if (offset == (int16_t)(offset & ~3)) {
tcg_out32(s, op1 | TAI(ret, addr, offset));
} else {
tcg_out_movi(s, TCG_TYPE_I64, 0, offset);
@@ -749,7 +748,7 @@ static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
}
}
-#if defined (CONFIG_SOFTMMU)
+#if defined(CONFIG_SOFTMMU)
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -783,7 +782,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
tcg_out_rlw(s, RLWINM, r2, addr_reg, 0,
(32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
#else
- tcg_out_rld (s, RLDICL, r0, addr_reg,
+ tcg_out_rld(s, RLDICL, r0, addr_reg,
64 - TARGET_PAGE_BITS,
64 - CPU_TLB_BITS);
tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS);
@@ -792,13 +791,12 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
tcg_out32(s, LD_ADDR | TAI(r1, r0, offset));
if (!s_bits) {
- tcg_out_rld (s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS);
- }
- else {
- tcg_out_rld (s, RLDICL, r2, addr_reg,
- 64 - TARGET_PAGE_BITS,
- TARGET_PAGE_BITS - s_bits);
- tcg_out_rld (s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0);
+ tcg_out_rld(s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS);
+ } else {
+ tcg_out_rld(s, RLDICL, r2, addr_reg,
+ 64 - TARGET_PAGE_BITS,
+ TARGET_PAGE_BITS - s_bits);
+ tcg_out_rld(s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0);
}
#endif
}
@@ -826,7 +824,7 @@ static const uint32_t qemu_exts_opc[4] = {
EXTSB, EXTSH, EXTSW, 0
};
-static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
TCGReg addr_reg, data_reg, r0, r1, rbase;
uint32_t insn, s_bits;
@@ -848,23 +846,23 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
r2 = 0;
rbase = 0;
- tcg_out_tlb_read (s, r0, r1, r2, addr_reg, s_bits,
- offsetof (CPUArchState, tlb_table[mem_index][0].addr_read));
+ tcg_out_tlb_read(s, r0, r1, r2, addr_reg, s_bits,
+ offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
- tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L);
+ tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L);
label1_ptr = s->code_ptr;
#ifdef FAST_PATH
- tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE);
#endif
/* slow path */
ir = 3;
- tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
- tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
- tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
+ tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
+ tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
+ tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
- tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
+ tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
if (opc & 4) {
insn = qemu_exts_opc[s_bits];
@@ -873,11 +871,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3);
}
label2_ptr = s->code_ptr;
- tcg_out32 (s, B);
+ tcg_out32(s, B);
/* label1: fast path */
#ifdef FAST_PATH
- reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
+ reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
#endif
/* r0 now contains &env->tlb_table[mem_index][index].addr_read */
@@ -909,15 +907,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
insn = qemu_ldx_opc[s_bits];
tcg_out32(s, insn | TAB(data_reg, rbase, r0));
insn = qemu_exts_opc[s_bits];
- tcg_out32 (s, insn | RA(data_reg) | RS(data_reg));
+ tcg_out32(s, insn | RA(data_reg) | RS(data_reg));
}
#ifdef CONFIG_SOFTMMU
- reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
+ reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr);
#endif
}
-static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
{
TCGReg addr_reg, r0, r1, rbase, data_reg;
uint32_t insn;
@@ -938,38 +936,38 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
r2 = 0;
rbase = 0;
- tcg_out_tlb_read (s, r0, r1, r2, addr_reg, opc,
- offsetof (CPUArchState, tlb_table[mem_index][0].addr_write));
+ tcg_out_tlb_read(s, r0, r1, r2, addr_reg, opc,
+ offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
- tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L);
+ tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L);
label1_ptr = s->code_ptr;
#ifdef FAST_PATH
- tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE);
#endif
/* slow path */
ir = 3;
- tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
- tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
- tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
- tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
+ tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
+ tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
+ tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
+ tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
- tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
+ tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1);
label2_ptr = s->code_ptr;
- tcg_out32 (s, B);
+ tcg_out32(s, B);
/* label1: fast path */
#ifdef FAST_PATH
- reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
+ reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
#endif
- tcg_out32 (s, (LD
- | RT (r0)
- | RA (r0)
- | (offsetof (CPUTLBEntry, addend)
- - offsetof (CPUTLBEntry, addr_write))
+ tcg_out32(s, (LD
+ | RT(r0)
+ | RA(r0)
+ | (offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_write))
));
/* r0 = env->tlb_table[mem_index][index].addend */
tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
@@ -995,11 +993,11 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
}
#ifdef CONFIG_SOFTMMU
- reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
+ reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr);
#endif
}
-static void tcg_target_qemu_prologue (TCGContext *s)
+static void tcg_target_qemu_prologue(TCGContext *s)
{
int i, frame_size;
#ifndef __APPLE__
@@ -1014,52 +1012,52 @@ static void tcg_target_qemu_prologue (TCGContext *s)
+ 8 /* link editor doubleword */
+ 8 /* TOC save area */
+ TCG_STATIC_CALL_ARGS_SIZE
- + ARRAY_SIZE (tcg_target_callee_save_regs) * 8
+ + ARRAY_SIZE(tcg_target_callee_save_regs) * 8
+ CPU_TEMP_BUF_NLONGS * sizeof(long)
;
frame_size = (frame_size + 15) & ~15;
- tcg_set_frame (s, TCG_REG_CALL_STACK, frame_size
- - CPU_TEMP_BUF_NLONGS * sizeof (long),
- CPU_TEMP_BUF_NLONGS * sizeof (long));
+ tcg_set_frame(s, TCG_REG_CALL_STACK, frame_size
+ - CPU_TEMP_BUF_NLONGS * sizeof(long),
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
#ifndef __APPLE__
/* First emit adhoc function descriptor */
addr = (uint64_t) s->code_ptr + 24;
- tcg_out32 (s, addr >> 32); tcg_out32 (s, addr); /* entry point */
+ tcg_out32(s, addr >> 32); tcg_out32(s, addr); /* entry point */
s->code_ptr += 16; /* skip TOC and environment pointer */
#endif
/* Prologue */
- tcg_out32 (s, MFSPR | RT (0) | LR);
- tcg_out32 (s, STDU | RS (1) | RA (1) | (-frame_size & 0xffff));
- for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i)
- tcg_out32 (s, (STD
- | RS (tcg_target_callee_save_regs[i])
- | RA (1)
+ tcg_out32(s, MFSPR | RT(0) | LR);
+ tcg_out32(s, STDU | RS(1) | RA(1) | (-frame_size & 0xffff));
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i)
+ tcg_out32(s, (STD
+ | RS(tcg_target_callee_save_regs[i])
+ | RA(1)
| (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)
)
);
- tcg_out32 (s, STD | RS (0) | RA (1) | (frame_size + 16));
+ tcg_out32(s, STD | RS(0) | RA(1) | (frame_size + 16));
#ifdef CONFIG_USE_GUEST_BASE
if (GUEST_BASE) {
- tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE);
- tcg_regset_set_reg (s->reserved_regs, TCG_GUEST_BASE_REG);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
}
#endif
- tcg_out_mov (s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
- tcg_out32 (s, MTSPR | RS (tcg_target_call_iarg_regs[1]) | CTR);
- tcg_out32 (s, BCCTR | BO_ALWAYS);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
/* Epilogue */
tb_ret_addr = s->code_ptr;
- for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i)
- tcg_out32 (s, (LD
- | RT (tcg_target_callee_save_regs[i])
- | RA (1)
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i)
+ tcg_out32(s, (LD
+ | RT(tcg_target_callee_save_regs[i])
+ | RA(1)
| (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)
)
);
@@ -1072,19 +1070,21 @@ static void tcg_target_qemu_prologue (TCGContext *s)
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2)
{
- if (type == TCG_TYPE_I32)
- tcg_out_ldst (s, ret, arg1, arg2, LWZ, LWZX);
- else
- tcg_out_ldsta (s, ret, arg1, arg2, LD, LDX);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_ldst(s, ret, arg1, arg2, LWZ, LWZX);
+ } else {
+ tcg_out_ldsta(s, ret, arg1, arg2, LD, LDX);
+ }
}
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
intptr_t arg2)
{
- if (type == TCG_TYPE_I32)
- tcg_out_ldst (s, arg, arg1, arg2, STW, STWX);
- else
- tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_ldst(s, arg, arg1, arg2, STW, STWX);
+ } else {
+ tcg_out_ldsta(s, arg, arg1, arg2, STD, STDX);
+ }
}
static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
@@ -1106,8 +1106,7 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
op = CMPI;
imm = 1;
break;
- }
- else if ((uint16_t) arg2 == arg2) {
+ } else if ((uint16_t) arg2 == arg2) {
op = CMPLI;
imm = 1;
break;
@@ -1148,7 +1147,7 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
break;
default:
- tcg_abort ();
+ tcg_abort();
}
op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
@@ -1292,13 +1291,13 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
case TCG_COND_GE:
case TCG_COND_GEU:
sh = 31;
- crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_LT) | BB (7, CR_LT);
+ crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
goto crtest;
case TCG_COND_LE:
case TCG_COND_LEU:
sh = 31;
- crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT);
+ crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
crtest:
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
if (crop) {
@@ -1309,22 +1308,22 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
break;
default:
- tcg_abort ();
+ tcg_abort();
}
}
-static void tcg_out_bc (TCGContext *s, int bc, int label_index)
+static void tcg_out_bc(TCGContext *s, int bc, int label_index)
{
TCGLabel *l = &s->labels[label_index];
- if (l->has_value)
- tcg_out32 (s, bc | reloc_pc14_val (s->code_ptr, l->u.value));
- else {
+ if (l->has_value) {
+ tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value));
+ } else {
uint16_t val = *(uint16_t *) &s->code_ptr[2];
/* Thanks to Andrzej Zaborowski */
- tcg_out32 (s, bc | (val & 0xfffc));
- tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL14, label_index, 0);
+ tcg_out32(s, bc | (val & 0xfffc));
+ tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL14, label_index, 0);
}
}
@@ -1384,37 +1383,36 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
}
}
-void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
+void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr)
{
TCGContext s;
unsigned long patch_size;
s.code_ptr = (uint8_t *) jmp_addr;
- tcg_out_b (&s, 0, addr);
+ tcg_out_b(&s, 0, addr);
patch_size = s.code_ptr - (uint8_t *) jmp_addr;
- flush_icache_range (jmp_addr, jmp_addr + patch_size);
+ flush_icache_range(jmp_addr, jmp_addr + patch_size);
}
-static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
- const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+ const int *const_args)
{
TCGArg a0, a1, a2;
int c;
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi (s, TCG_TYPE_I64, TCG_REG_R3, args[0]);
- tcg_out_b (s, 0, (tcg_target_long) tb_ret_addr);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, args[0]);
+ tcg_out_b(s, 0, (tcg_target_long)tb_ret_addr);
break;
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
- /* direct jump method */
-
+ /* Direct jump method. */
s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
s->code_ptr += 28;
- }
- else {
- tcg_abort ();
+ } else {
+ /* Indirect jump method. */
+ tcg_abort();
}
s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
break;
@@ -1423,67 +1421,66 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
TCGLabel *l = &s->labels[args[0]];
if (l->has_value) {
- tcg_out_b (s, 0, l->u.value);
- }
- else {
+ tcg_out_b(s, 0, l->u.value);
+ } else {
uint32_t val = *(uint32_t *) s->code_ptr;
/* Thanks to Andrzej Zaborowski */
- tcg_out32 (s, B | (val & 0x3fffffc));
- tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL24, args[0], 0);
+ tcg_out32(s, B | (val & 0x3fffffc));
+ tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL24, args[0], 0);
}
}
break;
case INDEX_op_call:
- tcg_out_call (s, args[0], const_args[0]);
+ tcg_out_call(s, args[0], const_args[0]);
break;
case INDEX_op_movi_i32:
- tcg_out_movi (s, TCG_TYPE_I32, args[0], args[1]);
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
break;
case INDEX_op_movi_i64:
- tcg_out_movi (s, TCG_TYPE_I64, args[0], args[1]);
+ tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
break;
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX);
+ tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX);
break;
case INDEX_op_ld8s_i32:
case INDEX_op_ld8s_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX);
- tcg_out32 (s, EXTSB | RS (args[0]) | RA (args[0]));
+ tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX);
+ tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
break;
case INDEX_op_ld16u_i32:
case INDEX_op_ld16u_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], LHZ, LHZX);
+ tcg_out_ldst(s, args[0], args[1], args[2], LHZ, LHZX);
break;
case INDEX_op_ld16s_i32:
case INDEX_op_ld16s_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], LHA, LHAX);
+ tcg_out_ldst(s, args[0], args[1], args[2], LHA, LHAX);
break;
case INDEX_op_ld_i32:
case INDEX_op_ld32u_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], LWZ, LWZX);
+ tcg_out_ldst(s, args[0], args[1], args[2], LWZ, LWZX);
break;
case INDEX_op_ld32s_i64:
- tcg_out_ldsta (s, args[0], args[1], args[2], LWA, LWAX);
+ tcg_out_ldsta(s, args[0], args[1], args[2], LWA, LWAX);
break;
case INDEX_op_ld_i64:
- tcg_out_ldsta (s, args[0], args[1], args[2], LD, LDX);
+ tcg_out_ldsta(s, args[0], args[1], args[2], LD, LDX);
break;
case INDEX_op_st8_i32:
case INDEX_op_st8_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], STB, STBX);
+ tcg_out_ldst(s, args[0], args[1], args[2], STB, STBX);
break;
case INDEX_op_st16_i32:
case INDEX_op_st16_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], STH, STHX);
+ tcg_out_ldst(s, args[0], args[1], args[2], STH, STHX);
break;
case INDEX_op_st_i32:
case INDEX_op_st32_i64:
- tcg_out_ldst (s, args[0], args[1], args[2], STW, STWX);
+ tcg_out_ldst(s, args[0], args[1], args[2], STW, STWX);
break;
case INDEX_op_st_i64:
- tcg_out_ldsta (s, args[0], args[1], args[2], STD, STDX);
+ tcg_out_ldsta(s, args[0], args[1], args[2], STD, STDX);
break;
case INDEX_op_add_i32:
@@ -1607,32 +1604,33 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_div_i32:
- tcg_out32 (s, DIVW | TAB (args[0], args[1], args[2]));
+ tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
break;
case INDEX_op_divu_i32:
- tcg_out32 (s, DIVWU | TAB (args[0], args[1], args[2]));
+ tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
break;
case INDEX_op_shl_i32:
if (const_args[2]) {
tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31 - args[2]);
} else {
- tcg_out32 (s, SLW | SAB (args[1], args[0], args[2]));
+ tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
}
break;
case INDEX_op_shr_i32:
if (const_args[2]) {
tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], args[2], 31);
} else {
- tcg_out32 (s, SRW | SAB (args[1], args[0], args[2]));
+ tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
}
break;
case INDEX_op_sar_i32:
- if (const_args[2])
- tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
- else
- tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2]));
+ if (const_args[2]) {
+ tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2]));
+ } else {
+ tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
+ }
break;
case INDEX_op_rotl_i32:
if (const_args[2]) {
@@ -1664,12 +1662,12 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_neg_i32:
case INDEX_op_neg_i64:
- tcg_out32 (s, NEG | RT (args[0]) | RA (args[1]));
+ tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
break;
case INDEX_op_not_i32:
case INDEX_op_not_i64:
- tcg_out32 (s, NOR | SAB (args[1], args[0], args[1]));
+ tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
break;
case INDEX_op_add_i64:
@@ -1722,24 +1720,26 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_shl_i64:
- if (const_args[2])
+ if (const_args[2]) {
tcg_out_shli64(s, args[0], args[1], args[2]);
- else
- tcg_out32 (s, SLD | SAB (args[1], args[0], args[2]));
+ } else {
+ tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
+ }
break;
case INDEX_op_shr_i64:
- if (const_args[2])
+ if (const_args[2]) {
tcg_out_shri64(s, args[0], args[1], args[2]);
- else
- tcg_out32 (s, SRD | SAB (args[1], args[0], args[2]));
+ } else {
+ tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
+ }
break;
case INDEX_op_sar_i64:
if (const_args[2]) {
- int sh = SH (args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
- tcg_out32 (s, SRADI | RA (args[0]) | RS (args[1]) | sh);
+ int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
+ tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
+ } else {
+ tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
}
- else
- tcg_out32 (s, SRAD | SAB (args[1], args[0], args[2]));
break;
case INDEX_op_rotl_i64:
if (const_args[2]) {
@@ -1766,45 +1766,45 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_div_i64:
- tcg_out32 (s, DIVD | TAB (args[0], args[1], args[2]));
+ tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
break;
case INDEX_op_divu_i64:
- tcg_out32 (s, DIVDU | TAB (args[0], args[1], args[2]));
+ tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
break;
case INDEX_op_qemu_ld8u:
- tcg_out_qemu_ld (s, args, 0);
+ tcg_out_qemu_ld(s, args, 0);
break;
case INDEX_op_qemu_ld8s:
- tcg_out_qemu_ld (s, args, 0 | 4);
+ tcg_out_qemu_ld(s, args, 0 | 4);
break;
case INDEX_op_qemu_ld16u:
- tcg_out_qemu_ld (s, args, 1);
+ tcg_out_qemu_ld(s, args, 1);
break;
case INDEX_op_qemu_ld16s:
- tcg_out_qemu_ld (s, args, 1 | 4);
+ tcg_out_qemu_ld(s, args, 1 | 4);
break;
case INDEX_op_qemu_ld32:
case INDEX_op_qemu_ld32u:
- tcg_out_qemu_ld (s, args, 2);
+ tcg_out_qemu_ld(s, args, 2);
break;
case INDEX_op_qemu_ld32s:
- tcg_out_qemu_ld (s, args, 2 | 4);
+ tcg_out_qemu_ld(s, args, 2 | 4);
break;
case INDEX_op_qemu_ld64:
- tcg_out_qemu_ld (s, args, 3);
+ tcg_out_qemu_ld(s, args, 3);
break;
case INDEX_op_qemu_st8:
- tcg_out_qemu_st (s, args, 0);
+ tcg_out_qemu_st(s, args, 0);
break;
case INDEX_op_qemu_st16:
- tcg_out_qemu_st (s, args, 1);
+ tcg_out_qemu_st(s, args, 1);
break;
case INDEX_op_qemu_st32:
- tcg_out_qemu_st (s, args, 2);
+ tcg_out_qemu_st(s, args, 2);
break;
case INDEX_op_qemu_st64:
- tcg_out_qemu_st (s, args, 3);
+ tcg_out_qemu_st(s, args, 3);
break;
case INDEX_op_ext8s_i32:
@@ -1819,16 +1819,16 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
c = EXTSW;
goto gen_ext;
gen_ext:
- tcg_out32 (s, c | RS (args[1]) | RA (args[0]));
+ tcg_out32(s, c | RS(args[1]) | RA(args[0]));
break;
case INDEX_op_setcond_i32:
- tcg_out_setcond (s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
- const_args[2]);
+ tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
+ const_args[2]);
break;
case INDEX_op_setcond_i64:
- tcg_out_setcond (s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
- const_args[2]);
+ tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
+ const_args[2]);
break;
case INDEX_op_bswap16_i32:
@@ -1980,8 +1980,8 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
default:
- tcg_dump_ops (s);
- tcg_abort ();
+ tcg_dump_ops(s);
+ tcg_abort();
}
}
@@ -2109,7 +2109,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ -1 },
};
-static void tcg_target_init (TCGContext *s)
+static void tcg_target_init(TCGContext *s)
{
#ifdef CONFIG_GETAUXVAL
unsigned long hwcap = getauxval(AT_HWCAP);
@@ -2118,9 +2118,9 @@ static void tcg_target_init (TCGContext *s)
}
#endif
- tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
- tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
- tcg_regset_set32 (tcg_target_call_clobber_regs, 0,
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
+ tcg_regset_set32(tcg_target_call_clobber_regs, 0,
(1 << TCG_REG_R0) |
#ifdef __APPLE__
(1 << TCG_REG_R2) |
@@ -2137,13 +2137,13 @@ static void tcg_target_init (TCGContext *s)
(1 << TCG_REG_R12)
);
- tcg_regset_clear (s->reserved_regs);
- tcg_regset_set_reg (s->reserved_regs, TCG_REG_R0);
- tcg_regset_set_reg (s->reserved_regs, TCG_REG_R1);
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1);
#ifndef __APPLE__
- tcg_regset_set_reg (s->reserved_regs, TCG_REG_R2);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2);
#endif
- tcg_regset_set_reg (s->reserved_regs, TCG_REG_R13);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);
- tcg_add_target_add_op_defs (ppc_op_defs);
+ tcg_add_target_add_op_defs(ppc_op_defs);
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 09/21] tcg-ppc64: More use of TAI and SAI helper macros
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (7 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 08/21] tcg-ppc64: Reformat tcg-target.c Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 10/21] tcg-ppc64: Use TCG_REG_Rn constants Richard Henderson
` (11 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Finish conversion of all memory operations.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 41 ++++++++++++++++-------------------------
1 file changed, 16 insertions(+), 25 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index b554b00..114e23d 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -718,10 +718,10 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
tcg_out_movi(s, TCG_TYPE_I64, reg, arg);
}
- tcg_out32(s, LD | RT(0) | RA(reg));
+ tcg_out32(s, LD | TAI(0, reg, 0));
tcg_out32(s, MTSPR | RA(0) | CTR);
- tcg_out32(s, LD | RT(11) | RA(reg) | 16);
- tcg_out32(s, LD | RT(2) | RA(reg) | 8);
+ tcg_out32(s, LD | TAI(11, reg, 16));
+ tcg_out32(s, LD | TAI(2, reg, 8));
tcg_out32(s, BCCTR | BO_ALWAYS | LK);
#endif
}
@@ -963,12 +963,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
#endif
- tcg_out32(s, (LD
- | RT(r0)
- | RA(r0)
- | (offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_write))
- ));
+ tcg_out32(s, LD | TAI(r0, r0,
+ offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_write)));
/* r0 = env->tlb_table[mem_index][index].addend */
tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
/* r0 = env->tlb_table[mem_index][index].addend + addr */
@@ -1030,15 +1027,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* Prologue */
tcg_out32(s, MFSPR | RT(0) | LR);
- tcg_out32(s, STDU | RS(1) | RA(1) | (-frame_size & 0xffff));
- for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i)
- tcg_out32(s, (STD
- | RS(tcg_target_callee_save_regs[i])
- | RA(1)
- | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)
- )
- );
- tcg_out32(s, STD | RS(0) | RA(1) | (frame_size + 16));
+ tcg_out32(s, STDU | SAI(1, 1, -frame_size));
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+ tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1,
+ i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE));
+ }
+ tcg_out32(s, STD | SAI(0, 1, frame_size + 16));
#ifdef CONFIG_USE_GUEST_BASE
if (GUEST_BASE) {
@@ -1054,13 +1048,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* Epilogue */
tb_ret_addr = s->code_ptr;
- for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i)
- tcg_out32(s, (LD
- | RT(tcg_target_callee_save_regs[i])
- | RA(1)
- | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)
- )
- );
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+ tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], 1,
+ i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE));
+ }
tcg_out32(s, LD | TAI(0, 1, frame_size + 16));
tcg_out32(s, MTSPR | RS(0) | LR);
tcg_out32(s, ADDI | TAI(1, 1, frame_size));
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 10/21] tcg-ppc64: Use TCG_REG_Rn constants
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (8 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 09/21] tcg-ppc64: More use of TAI and SAI helper macros Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 11/21] tcg-ppc64: Use tcg_out64 Richard Henderson
` (10 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Instead of bare N, for clarity. The only (intentional) exception made
is for insns that encode R|0, i.e. when R0 encoded into the insn is
interpreted as zero not the contents of the register.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 96 +++++++++++++++++++++++++-------------------------
1 file changed, 48 insertions(+), 48 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 114e23d..848029f 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -637,8 +637,8 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
} else if (mask_operand(c, &mb, &me)) {
tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
} else {
- tcg_out_movi(s, TCG_TYPE_I32, 0, c);
- tcg_out32(s, AND | SAB(src, dst, 0));
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
+ tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
}
}
@@ -659,8 +659,8 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
tcg_out_rld(s, RLDICL, dst, src, 0, mb);
}
} else {
- tcg_out_movi(s, TCG_TYPE_I64, 0, c);
- tcg_out32(s, AND | SAB(src, dst, 0));
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
+ tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
}
}
@@ -695,8 +695,8 @@ static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target)
if ((disp << 38) >> 38 == disp) {
tcg_out32(s, B | (disp & 0x3fffffc) | mask);
} else {
- tcg_out_movi(s, TCG_TYPE_I64, 0, (tcg_target_long)target);
- tcg_out32(s, MTSPR | RS(0) | CTR);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target);
+ tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
tcg_out32(s, BCCTR | BO_ALWAYS | mask);
}
}
@@ -714,14 +714,14 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
int reg = arg;
if (const_arg) {
- reg = 2;
+ reg = TCG_REG_R2;
tcg_out_movi(s, TCG_TYPE_I64, reg, arg);
}
- tcg_out32(s, LD | TAI(0, reg, 0));
- tcg_out32(s, MTSPR | RA(0) | CTR);
- tcg_out32(s, LD | TAI(11, reg, 16));
- tcg_out32(s, LD | TAI(2, reg, 8));
+ tcg_out32(s, LD | TAI(TCG_REG_R0, reg, 0));
+ tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
+ tcg_out32(s, LD | TAI(TCG_REG_R11, reg, 16));
+ tcg_out32(s, LD | TAI(TCG_REG_R2, reg, 8));
tcg_out32(s, BCCTR | BO_ALWAYS | LK);
#endif
}
@@ -732,8 +732,8 @@ static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
if (offset == (int16_t) offset) {
tcg_out32(s, op1 | TAI(ret, addr, offset));
} else {
- tcg_out_movi(s, TCG_TYPE_I64, 0, offset);
- tcg_out32(s, op2 | TAB(ret, addr, 0));
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset);
+ tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0));
}
}
@@ -743,8 +743,8 @@ static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
if (offset == (int16_t)(offset & ~3)) {
tcg_out32(s, op1 | TAI(ret, addr, offset));
} else {
- tcg_out_movi(s, TCG_TYPE_I64, 0, offset);
- tcg_out32(s, op2 | TAB(ret, addr, 0));
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset);
+ tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0));
}
}
@@ -841,9 +841,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU
mem_index = *args;
- r0 = 3;
- r1 = 4;
- r2 = 0;
+ r0 = TCG_REG_R3;
+ r1 = TCG_REG_R4;
+ r2 = TCG_REG_R0;
rbase = 0;
tcg_out_tlb_read(s, r0, r1, r2, addr_reg, s_bits,
@@ -857,7 +857,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#endif
/* slow path */
- ir = 3;
+ ir = TCG_REG_R3;
tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
@@ -866,9 +866,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
if (opc & 4) {
insn = qemu_exts_opc[s_bits];
- tcg_out32(s, insn | RA(data_reg) | RS(3));
- } else if (data_reg != 3) {
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3);
+ tcg_out32(s, insn | RA(data_reg) | RS(TCG_REG_R3));
+ } else if (data_reg != TCG_REG_R3) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R3);
}
label2_ptr = s->code_ptr;
tcg_out32(s, B);
@@ -891,7 +891,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_out_ext32u(s, addr_reg, addr_reg);
#endif
r0 = addr_reg;
- r1 = 3;
+ r1 = TCG_REG_R3;
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
@@ -931,9 +931,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU
mem_index = *args;
- r0 = 3;
- r1 = 4;
- r2 = 0;
+ r0 = TCG_REG_R3;
+ r1 = TCG_REG_R4;
+ r2 = TCG_REG_R0;
rbase = 0;
tcg_out_tlb_read(s, r0, r1, r2, addr_reg, opc,
@@ -947,7 +947,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
#endif
/* slow path */
- ir = 3;
+ ir = TCG_REG_R3;
tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
@@ -974,7 +974,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
#if TARGET_LONG_BITS == 32
tcg_out_ext32u(s, addr_reg, addr_reg);
#endif
- r1 = 3;
+ r1 = TCG_REG_R3;
r0 = addr_reg;
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
@@ -983,8 +983,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
if (!HAVE_ISA_2_06 && insn == STDBRX) {
tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0));
tcg_out32(s, ADDI | TAI(r1, r0, 4));
- tcg_out_shri64(s, 0, data_reg, 32);
- tcg_out32(s, STWBRX | SAB(0, rbase, r1));
+ tcg_out_shri64(s, TCG_REG_R0, data_reg, 32);
+ tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, r1));
} else {
tcg_out32(s, insn | SAB(data_reg, rbase, r0));
}
@@ -1026,13 +1026,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#endif
/* Prologue */
- tcg_out32(s, MFSPR | RT(0) | LR);
- tcg_out32(s, STDU | SAI(1, 1, -frame_size));
+ tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
+ tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -frame_size));
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1,
i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE));
}
- tcg_out32(s, STD | SAI(0, 1, frame_size + 16));
+ tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16));
#ifdef CONFIG_USE_GUEST_BASE
if (GUEST_BASE) {
@@ -1049,12 +1049,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tb_ret_addr = s->code_ptr;
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
- tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], 1,
+ tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1,
i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE));
}
- tcg_out32(s, LD | TAI(0, 1, frame_size + 16));
- tcg_out32(s, MTSPR | RS(0) | LR);
- tcg_out32(s, ADDI | TAI(1, 1, frame_size));
+ tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16));
+ tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
+ tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, frame_size));
tcg_out32(s, BCLR | BO_ALWAYS);
}
@@ -1146,8 +1146,8 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
} else {
if (const_arg2) {
- tcg_out_movi(s, type, 0, arg2);
- arg2 = 0;
+ tcg_out_movi(s, type, TCG_REG_R0, arg2);
+ arg2 = TCG_REG_R0;
}
tcg_out32(s, op | RA(arg1) | RB(arg2));
}
@@ -1168,8 +1168,8 @@ static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
tcg_out32(s, ADDIC | TAI(dst, src, -1));
tcg_out32(s, SUBFE | TAB(dst, dst, src));
} else {
- tcg_out32(s, ADDIC | TAI(0, src, -1));
- tcg_out32(s, SUBFE | TAB(dst, 0, src));
+ tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
}
}
@@ -1350,7 +1350,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
}
/* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */
if (v2 == 0) {
- tcg_out_movi(s, type, 0, 0);
+ tcg_out_movi(s, type, TCG_REG_R0, 0);
}
tcg_out32(s, isel | TAB(dest, v1, v2));
} else {
@@ -1635,8 +1635,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
if (const_args[2]) {
tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
} else {
- tcg_out32(s, SUBFIC | TAI(0, args[2], 32));
- tcg_out32(s, RLWNM | SAB(args[1], args[0], 0)
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
| MB(0) | ME(31));
}
break;
@@ -1743,8 +1743,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
if (const_args[2]) {
tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
} else {
- tcg_out32(s, SUBFIC | TAI(0, args[2], 64));
- tcg_out32(s, RLDCL | SAB(args[1], args[0], 0) | MB64(0));
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
}
break;
@@ -1861,9 +1861,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_bswap64_i64:
- a0 = args[0], a1 = args[1], a2 = 0;
+ a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
if (a0 == a1) {
- a0 = 0;
+ a0 = TCG_REG_R0;
a2 = a1;
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 11/21] tcg-ppc64: Use tcg_out64
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (9 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 10/21] tcg-ppc64: Use TCG_REG_Rn constants Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 12/21] tcg-ppc64: Avoid code for nop move Richard Henderson
` (9 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 848029f..27a955b 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -997,9 +997,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
static void tcg_target_qemu_prologue(TCGContext *s)
{
int i, frame_size;
-#ifndef __APPLE__
- uint64_t addr;
-#endif
frame_size = 0
+ 8 /* back chain */
@@ -1020,8 +1017,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#ifndef __APPLE__
/* First emit adhoc function descriptor */
- addr = (uint64_t) s->code_ptr + 24;
- tcg_out32(s, addr >> 32); tcg_out32(s, addr); /* entry point */
+ tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */
s->code_ptr += 16; /* skip TOC and environment pointer */
#endif
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 12/21] tcg-ppc64: Avoid code for nop move
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (10 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 11/21] tcg-ppc64: Use tcg_out64 Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 13/21] tcg-ppc64: Don't load the static chain from TCG Richard Henderson
` (8 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
While these are rare from code that's been through the optimizer,
it's not uncommon within the tcg backend.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 27a955b..357f8c1 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -509,7 +509,9 @@ static const uint32_t tcg_to_isel[] = {
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
- tcg_out32(s, OR | SAB(arg, ret, arg));
+ if (ret != arg) {
+ tcg_out32(s, OR | SAB(arg, ret, arg));
+ }
}
static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 13/21] tcg-ppc64: Don't load the static chain from TCG
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (11 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 12/21] tcg-ppc64: Avoid code for nop move Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 14/21] tcg-ppc64: Fold constant call address into descriptor load Richard Henderson
` (7 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
There are no helpers that require the static chain.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 357f8c1..5ac62bf 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -722,7 +722,6 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
tcg_out32(s, LD | TAI(TCG_REG_R0, reg, 0));
tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
- tcg_out32(s, LD | TAI(TCG_REG_R11, reg, 16));
tcg_out32(s, LD | TAI(TCG_REG_R2, reg, 8));
tcg_out32(s, BCCTR | BO_ALWAYS | LK);
#endif
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 14/21] tcg-ppc64: Fold constant call address into descriptor load
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (12 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 13/21] tcg-ppc64: Don't load the static chain from TCG Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 15/21] tcg-ppc64: Look through a constant function descriptor Richard Henderson
` (6 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Eliminates one insn per call:
: lis r2,4165
-: ori r2,r2,59616
-: ld r0,0(r2)
+: ld r0,-5920(r2)
: mtctr r0
-: ld r2,8(r2)
+: ld r2,-5912(r2)
: bctrl
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 5ac62bf..8eb0406 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -713,16 +713,24 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
tcg_out32(s, BCLR | BO_ALWAYS | LK);
}
#else
- int reg = arg;
+ TCGReg reg = arg;
+ int ofs = 0;
if (const_arg) {
+ /* Fold the low bits of the constant into the addresses below. */
+ ofs = (int16_t)arg;
+ if (ofs + 8 < 0x8000) {
+ arg -= ofs;
+ } else {
+ ofs = 0;
+ }
reg = TCG_REG_R2;
tcg_out_movi(s, TCG_TYPE_I64, reg, arg);
}
- tcg_out32(s, LD | TAI(TCG_REG_R0, reg, 0));
+ tcg_out32(s, LD | TAI(TCG_REG_R0, reg, ofs));
tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
- tcg_out32(s, LD | TAI(TCG_REG_R2, reg, 8));
+ tcg_out32(s, LD | TAI(TCG_REG_R2, reg, ofs + 8));
tcg_out32(s, BCCTR | BO_ALWAYS | LK);
#endif
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 15/21] tcg-ppc64: Look through a constant function descriptor
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (13 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 14/21] tcg-ppc64: Fold constant call address into descriptor load Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 16/21] tcg-ppc64: Tidy register allocation order Richard Henderson
` (5 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Especially in the user-only configurations, a direct branch into
the executable may be in range.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 8eb0406..0659dd6 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -173,14 +173,17 @@ static const int tcg_target_callee_save_regs[] = {
TCG_REG_R31
};
+static inline bool in_range_b(tcg_target_long target)
+{
+ return target == sextract64(target, 0, 26);
+}
+
static uint32_t reloc_pc24_val(void *pc, tcg_target_long target)
{
tcg_target_long disp;
disp = target - (tcg_target_long)pc;
- if ((disp << 38) >> 38 != disp) {
- tcg_abort();
- }
+ assert(in_range_b(disp));
return disp & 0x3fffffc;
}
@@ -694,7 +697,7 @@ static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target)
tcg_target_long disp;
disp = target - (tcg_target_long)s->code_ptr;
- if ((disp << 38) >> 38 == disp) {
+ if (in_range_b(disp)) {
tcg_out32(s, B | (disp & 0x3fffffc) | mask);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target);
@@ -717,6 +720,18 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
int ofs = 0;
if (const_arg) {
+ /* Look through the descriptor. If the branch is in range, and we
+ don't have to spend too much effort on building the toc. */
+ intptr_t tgt = ((intptr_t *)arg)[0];
+ intptr_t toc = ((intptr_t *)arg)[1];
+ intptr_t diff = tgt - (intptr_t)s->code_ptr;
+
+ if (in_range_b(diff) && toc == (uint32_t)toc) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, toc);
+ tcg_out_b(s, LK, tgt);
+ return;
+ }
+
/* Fold the low bits of the constant into the addresses below. */
ofs = (int16_t)arg;
if (ofs + 8 < 0x8000) {
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 16/21] tcg-ppc64: Tidy register allocation order
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (14 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 15/21] tcg-ppc64: Look through a constant function descriptor Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 17/21] tcg-ppc64: Handle long offsets better Richard Henderson
` (4 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Remove conditionalization from tcg_target_reg_alloc_order, relying on
reserved_regs to prevent register allocation that shouldn't happen.
So R11 is now present in reg_alloc_order for __APPLE__, but also now
reserved.
Sort reg_alloc_order into call-saved, call-clobbered, and parameters.
This reduces the effect of values getting spilled and reloaded before
function calls.
Whether or not it is reserved, R2 (TOC) is always call-clobbered.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 49 ++++++++++++++++++++++---------------------------
1 file changed, 22 insertions(+), 27 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 0659dd6..c01a8bb 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -99,7 +99,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#endif
static const int tcg_target_reg_alloc_order[] = {
- TCG_REG_R14,
+ TCG_REG_R14, /* call saved registers */
TCG_REG_R15,
TCG_REG_R16,
TCG_REG_R17,
@@ -109,29 +109,25 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R21,
TCG_REG_R22,
TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27,
TCG_REG_R28,
TCG_REG_R29,
TCG_REG_R30,
TCG_REG_R31,
-#ifdef __APPLE__
+ TCG_REG_R12, /* call clobbered, non-arguments */
+ TCG_REG_R11,
TCG_REG_R2,
-#endif
- TCG_REG_R3,
- TCG_REG_R4,
- TCG_REG_R5,
- TCG_REG_R6,
- TCG_REG_R7,
- TCG_REG_R8,
+ TCG_REG_R10, /* call clobbered, arguments */
TCG_REG_R9,
- TCG_REG_R10,
-#ifndef __APPLE__
- TCG_REG_R11,
-#endif
- TCG_REG_R12,
- TCG_REG_R24,
- TCG_REG_R25,
- TCG_REG_R26,
- TCG_REG_R27
+ TCG_REG_R8,
+ TCG_REG_R7,
+ TCG_REG_R6,
+ TCG_REG_R5,
+ TCG_REG_R4,
+ TCG_REG_R3,
};
static const int tcg_target_call_iarg_regs[] = {
@@ -2133,9 +2129,7 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
tcg_regset_set32(tcg_target_call_clobber_regs, 0,
(1 << TCG_REG_R0) |
-#ifdef __APPLE__
(1 << TCG_REG_R2) |
-#endif
(1 << TCG_REG_R3) |
(1 << TCG_REG_R4) |
(1 << TCG_REG_R5) |
@@ -2145,16 +2139,17 @@ static void tcg_target_init(TCGContext *s)
(1 << TCG_REG_R9) |
(1 << TCG_REG_R10) |
(1 << TCG_REG_R11) |
- (1 << TCG_REG_R12)
- );
+ (1 << TCG_REG_R12));
tcg_regset_clear(s->reserved_regs);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1);
-#ifndef __APPLE__
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
+#ifdef __APPLE__
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */
+#else
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc */
#endif
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
tcg_add_target_add_op_defs(ppc_op_defs);
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 17/21] tcg-ppc64: Handle long offsets better
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (15 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 16/21] tcg-ppc64: Tidy register allocation order Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 18/21] tcg-ppc64: Implement tcg_register_jit Richard Henderson
` (3 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Previously we'd only handle 16-bit offsets from memory operand without falling
back to indexed, but it's easy to use ADDIS to handle full 32-bit offsets.
This also lets us unify code that existed inline in tcg_out_op for handling
addition of large constants.
The new R2 temporary was marked reserved for the AIX calling convention, but
the register really is call-clobbered and since tcg generated code has no use
for a TOC, it's available for use.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 147 +++++++++++++++++++++++++------------------------
1 file changed, 74 insertions(+), 73 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index c01a8bb..51d2b06 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -119,7 +119,6 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R31,
TCG_REG_R12, /* call clobbered, non-arguments */
TCG_REG_R11,
- TCG_REG_R2,
TCG_REG_R10, /* call clobbered, arguments */
TCG_REG_R9,
TCG_REG_R8,
@@ -746,25 +745,55 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
#endif
}
-static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
- int offset, int op1, int op2)
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+ TCGReg base, tcg_target_long offset)
{
- if (offset == (int16_t) offset) {
- tcg_out32(s, op1 | TAI(ret, addr, offset));
- } else {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset);
- tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0));
+ tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
+ TCGReg rs = TCG_REG_R2;
+
+ assert(rt != TCG_REG_R2 && base != TCG_REG_R2);
+
+ switch (opi) {
+ case LD: case LWA:
+ align = 3;
+ /* FALLTHRU */
+ default:
+ if (rt != TCG_REG_R0) {
+ rs = rt;
+ }
+ break;
+ case STD:
+ align = 3;
+ break;
+ case STB: case STH: case STW:
+ break;
}
-}
-static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
- int offset, int op1, int op2)
-{
- if (offset == (int16_t)(offset & ~3)) {
- tcg_out32(s, op1 | TAI(ret, addr, offset));
- } else {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset);
- tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0));
+ /* For unaligned, or very large offsets, use the indexed form. */
+ if (offset & align || offset != (int32_t)offset) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig);
+ tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2));
+ return;
+ }
+
+ l0 = (int16_t)offset;
+ offset = (offset - l0) >> 16;
+ l1 = (int16_t)offset;
+
+ if (l1 < 0 && orig >= 0) {
+ extra = 0x4000;
+ l1 = (int16_t)(offset - 0x4000);
+ }
+ if (l1) {
+ tcg_out32(s, ADDIS | TAI(rs, base, l1));
+ base = rs;
+ }
+ if (extra) {
+ tcg_out32(s, ADDIS | TAI(rs, base, extra));
+ base = rs;
+ }
+ if (opi != ADDI || base != rt || l0 != 0) {
+ tcg_out32(s, opi | TAI(rt, base, l0));
}
}
@@ -1074,24 +1103,30 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCLR | BO_ALWAYS);
}
-static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
- intptr_t arg2)
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
{
+ int opi, opx;
+
if (type == TCG_TYPE_I32) {
- tcg_out_ldst(s, ret, arg1, arg2, LWZ, LWZX);
+ opi = LWZ, opx = LWZX;
} else {
- tcg_out_ldsta(s, ret, arg1, arg2, LD, LDX);
+ opi = LD, opx = LDX;
}
+ tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
}
-static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
- intptr_t arg2)
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
{
+ int opi, opx;
+
if (type == TCG_TYPE_I32) {
- tcg_out_ldst(s, arg, arg1, arg2, STW, STWX);
+ opi = STW, opx = STWX;
} else {
- tcg_out_ldsta(s, arg, arg1, arg2, STD, STDX);
+ opi = STD, opx = STDX;
}
+ tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
}
static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
@@ -1449,61 +1484,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX);
+ tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
break;
case INDEX_op_ld8s_i32:
case INDEX_op_ld8s_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX);
+ tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
break;
case INDEX_op_ld16u_i32:
case INDEX_op_ld16u_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], LHZ, LHZX);
+ tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
break;
case INDEX_op_ld16s_i32:
case INDEX_op_ld16s_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], LHA, LHAX);
+ tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
break;
case INDEX_op_ld_i32:
case INDEX_op_ld32u_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], LWZ, LWZX);
+ tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
break;
case INDEX_op_ld32s_i64:
- tcg_out_ldsta(s, args[0], args[1], args[2], LWA, LWAX);
+ tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
break;
case INDEX_op_ld_i64:
- tcg_out_ldsta(s, args[0], args[1], args[2], LD, LDX);
+ tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
break;
case INDEX_op_st8_i32:
case INDEX_op_st8_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], STB, STBX);
+ tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
break;
case INDEX_op_st16_i32:
case INDEX_op_st16_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], STH, STHX);
+ tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
break;
case INDEX_op_st_i32:
case INDEX_op_st32_i64:
- tcg_out_ldst(s, args[0], args[1], args[2], STW, STWX);
+ tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
break;
case INDEX_op_st_i64:
- tcg_out_ldsta(s, args[0], args[1], args[2], STD, STDX);
+ tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
break;
case INDEX_op_add_i32:
a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- int32_t l, h;
do_addi_32:
- l = (int16_t)a2;
- h = a2 - l;
- if (h) {
- tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16));
- a1 = a0;
- }
- if (l || a0 != a1) {
- tcg_out32(s, ADDI | TAI(a0, a1, l));
- }
+ tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
} else {
tcg_out32(s, ADD | TAB(a0, a1, a2));
}
@@ -1680,32 +1706,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_add_i64:
a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- int32_t l0, h1, h2;
do_addi_64:
- /* We can always split any 32-bit signed constant into 3 pieces.
- Note the positive 0x80000000 coming from the sub_i64 path,
- handled with the same code we need for eg 0x7fff8000. */
- assert(a2 == (int32_t)a2 || a2 == 0x80000000);
- l0 = (int16_t)a2;
- h1 = a2 - l0;
- h2 = 0;
- if (h1 < 0 && (int64_t)a2 > 0) {
- h2 = 0x40000000;
- h1 = a2 - h2 - l0;
- }
- assert((TCGArg)h2 + h1 + l0 == a2);
-
- if (h2) {
- tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16));
- a1 = a0;
- }
- if (h1) {
- tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16));
- a1 = a0;
- }
- if (l0 || a0 != a1) {
- tcg_out32(s, ADDI | TAI(a0, a1, l0));
- }
+ tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
} else {
tcg_out32(s, ADD | TAB(a0, a1, a2));
}
@@ -2144,10 +2146,9 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* mem temp */
#ifdef __APPLE__
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */
-#else
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc */
#endif
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 18/21] tcg-ppc64: Implement tcg_register_jit
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (16 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 17/21] tcg-ppc64: Handle long offsets better Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 19/21] tcg-ppc64: Streamline tcg_out_tlb_read Richard Henderson
` (2 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 96 ++++++++++++++++++++++++++++++++++++++------------
1 file changed, 73 insertions(+), 23 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 51d2b06..8f58831 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1043,25 +1043,26 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
#endif
}
+#define FRAME_SIZE ((int) \
+ ((8 /* back chain */ \
+ + 8 /* CR */ \
+ + 8 /* LR */ \
+ + 8 /* compiler doubleword */ \
+ + 8 /* link editor doubleword */ \
+ + 8 /* TOC save area */ \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + ARRAY_SIZE(tcg_target_callee_save_regs) * 8 \
+ + 15) & ~15))
+
+#define REG_SAVE_BOT (FRAME_SIZE - ARRAY_SIZE(tcg_target_callee_save_regs) * 8)
+
static void tcg_target_qemu_prologue(TCGContext *s)
{
- int i, frame_size;
-
- frame_size = 0
- + 8 /* back chain */
- + 8 /* CR */
- + 8 /* LR */
- + 8 /* compiler doubleword */
- + 8 /* link editor doubleword */
- + 8 /* TOC save area */
- + TCG_STATIC_CALL_ARGS_SIZE
- + ARRAY_SIZE(tcg_target_callee_save_regs) * 8
- + CPU_TEMP_BUF_NLONGS * sizeof(long)
- ;
- frame_size = (frame_size + 15) & ~15;
-
- tcg_set_frame(s, TCG_REG_CALL_STACK, frame_size
- - CPU_TEMP_BUF_NLONGS * sizeof(long),
+ int i;
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ REG_SAVE_BOT - CPU_TEMP_BUF_NLONGS * sizeof(long),
CPU_TEMP_BUF_NLONGS * sizeof(long));
#ifndef __APPLE__
@@ -1072,12 +1073,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* Prologue */
tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
- tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -frame_size));
+ tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1,
- i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE));
+ REG_SAVE_BOT + i * 8));
}
- tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16));
+ tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16));
#ifdef CONFIG_USE_GUEST_BASE
if (GUEST_BASE) {
@@ -1095,11 +1096,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1,
- i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE));
+ REG_SAVE_BOT + i * 8));
}
- tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16));
+ tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16));
tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
- tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, frame_size));
+ tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
tcg_out32(s, BCLR | BO_ALWAYS);
}
@@ -2154,3 +2155,52 @@ static void tcg_target_init(TCGContext *s)
tcg_add_target_add_op_defs(ppc_op_defs);
}
+
+typedef struct {
+ DebugFrameCIE cie;
+ DebugFrameFDEHeader fde;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#define ELF_HOST_MACHINE EM_PPC64
+
+static DebugFrame debug_frame = {
+ .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .cie.id = -1,
+ .cie.version = 1,
+ .cie.code_align = 1,
+ .cie.data_align = 0x78, /* sleb128 -8 */
+ .cie.return_column = 65,
+
+ /* Total FDE size does not include the "len" member. */
+ .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 1, /* DW_CFA_def_cfa r1, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x11, 65, 0x7e, /* DW_CFA_offset_extended_sf, lr, 16 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ uint8_t *p = &debug_frame.fde_reg_ofs[3];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
+ p[0] = 0x80 + tcg_target_callee_save_regs[i];
+ p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * 8)) / 8;
+ }
+
+ debug_frame.fde.func_start = (tcg_target_long) buf;
+ debug_frame.fde.func_len = buf_size;
+
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 19/21] tcg-ppc64: Streamline tcg_out_tlb_read
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (17 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 18/21] tcg-ppc64: Implement tcg_register_jit Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 20/21] tcg-ppc64: Add _noaddr functions for emitting forward branches Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 21/21] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION Richard Henderson
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Less conditional compilation. Merge an add insn with the indexed
memory load insn. Load the tlb addend earlier. Avoid the address
update memory form.
Fix a bug in not allowing large enough tlb offsets for some guests.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 194 ++++++++++++++++++++++++-------------------------
1 file changed, 97 insertions(+), 97 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 8f58831..2076299 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -31,13 +31,11 @@
static uint8_t *tb_ret_addr;
-#define FAST_PATH
-
#if TARGET_LONG_BITS == 32
-#define LD_ADDR LWZU
+#define LD_ADDR LWZ
#define CMP_L 0
#else
-#define LD_ADDR LDU
+#define LD_ADDR LD
#define CMP_L (1<<21)
#endif
@@ -816,38 +814,78 @@ static const void * const qemu_st_helpers[4] = {
helper_stq_mmu,
};
-static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
- TCGReg addr_reg, int s_bits, int offset)
+/* Perform the TLB load and compare. Places the result of the comparison
+ in CR7, loads the addend of the TLB into R3, and returns the register
+ containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg,
+ int mem_index, bool is_read)
{
-#if TARGET_LONG_BITS == 32
- tcg_out_ext32u(s, addr_reg, addr_reg);
-
- tcg_out_rlw(s, RLWINM, r0, addr_reg,
- 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
- 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
- 31 - CPU_TLB_ENTRY_BITS);
- tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0));
- tcg_out32(s, LWZU | TAI(r1, r0, offset));
- tcg_out_rlw(s, RLWINM, r2, addr_reg, 0,
- (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
-#else
- tcg_out_rld(s, RLDICL, r0, addr_reg,
- 64 - TARGET_PAGE_BITS,
- 64 - CPU_TLB_BITS);
- tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS);
+ int cmp_off
+ = (is_read
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+ TCGReg base = TCG_AREG0;
+
+ /* Extract the page index, shifted into place for tlb index. */
+ if (TARGET_LONG_BITS == 32) {
+ /* Zero-extend the address into a place helpful for further use. */
+ tcg_out_ext32u(s, TCG_REG_R4, addr_reg);
+ addr_reg = TCG_REG_R4;
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R3, addr_reg,
+ 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS);
+ }
- tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0));
- tcg_out32(s, LD_ADDR | TAI(r1, r0, offset));
+ /* Compensate for very large offsets. */
+ if (add_off >= 0x8000) {
+ /* Most target env are smaller than 32k; none are larger than 64k.
+ Simplify the logic here merely to offset by 0x7ff0, giving us a
+ range just shy of 64k. Check this assumption. */
+ QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
+ tlb_table[NB_MMU_MODES - 1][1])
+ > 0x7ff0 + 0x7fff);
+ tcg_out32(s, ADDI | TAI(TCG_REG_R2, base, 0x7ff0));
+ base = TCG_REG_R2;
+ cmp_off -= 0x7ff0;
+ add_off -= 0x7ff0;
+ }
- if (!s_bits) {
- tcg_out_rld(s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS);
+ /* Extraction and shifting, part 2. */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg,
+ 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
+ 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
+ 31 - CPU_TLB_ENTRY_BITS);
} else {
- tcg_out_rld(s, RLDICL, r2, addr_reg,
- 64 - TARGET_PAGE_BITS,
- TARGET_PAGE_BITS - s_bits);
- tcg_out_rld(s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0);
+ tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
}
-#endif
+
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base));
+
+ /* Load the tlb comparator. */
+ tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, cmp_off));
+
+ /* Load the TLB addend for use on the fast path. Do this asap
+ to minimize any load use delay. */
+ tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, add_off));
+
+ /* Clear the non-page, non-alignment bits from the address. */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0,
+ (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
+ } else if (!s_bits) {
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, addr_reg, 0, 63 - TARGET_PAGE_BITS);
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, addr_reg,
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+ }
+
+ tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L);
+
+ return addr_reg;
}
#endif
@@ -875,10 +913,10 @@ static const uint32_t qemu_exts_opc[4] = {
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
- TCGReg addr_reg, data_reg, r0, r1, rbase;
+ TCGReg addr_reg, data_reg, rbase;
uint32_t insn, s_bits;
#ifdef CONFIG_SOFTMMU
- TCGReg r2, ir;
+ TCGReg ir;
int mem_index;
void *label1_ptr, *label2_ptr;
#endif
@@ -890,20 +928,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU
mem_index = *args;
- r0 = TCG_REG_R3;
- r1 = TCG_REG_R4;
- r2 = TCG_REG_R0;
- rbase = 0;
-
- tcg_out_tlb_read(s, r0, r1, r2, addr_reg, s_bits,
- offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
-
- tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L);
+ addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
label1_ptr = s->code_ptr;
-#ifdef FAST_PATH
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE);
-#endif
/* slow path */
ir = TCG_REG_R3;
@@ -919,42 +947,33 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
} else if (data_reg != TCG_REG_R3) {
tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R3);
}
+
label2_ptr = s->code_ptr;
tcg_out32(s, B);
/* label1: fast path */
-#ifdef FAST_PATH
reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
-#endif
-
- /* r0 now contains &env->tlb_table[mem_index][index].addr_read */
- tcg_out32(s, LD | TAI(r0, r0,
- offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_read)));
- /* r0 = env->tlb_table[mem_index][index].addend */
- tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
- /* r0 = env->tlb_table[mem_index][index].addend + addr */
+ rbase = TCG_REG_R3;
#else /* !CONFIG_SOFTMMU */
-#if TARGET_LONG_BITS == 32
- tcg_out_ext32u(s, addr_reg, addr_reg);
-#endif
- r0 = addr_reg;
- r1 = TCG_REG_R3;
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_R2, addr_reg);
+ addr_reg = TCG_REG_R2;
+ }
#endif
insn = qemu_ldx_opc[opc];
if (!HAVE_ISA_2_06 && insn == LDBRX) {
- tcg_out32(s, ADDI | TAI(r1, r0, 4));
- tcg_out32(s, LWBRX | TAB(data_reg, rbase, r0));
- tcg_out32(s, LWBRX | TAB( r1, rbase, r1));
- tcg_out_rld(s, RLDIMI, data_reg, r1, 32, 0);
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addr_reg, 4));
+ tcg_out32(s, LWBRX | TAB(data_reg, rbase, addr_reg));
+ tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
+ tcg_out_rld(s, RLDIMI, data_reg, TCG_REG_R0, 32, 0);
} else if (insn) {
- tcg_out32(s, insn | TAB(data_reg, rbase, r0));
+ tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
} else {
insn = qemu_ldx_opc[s_bits];
- tcg_out32(s, insn | TAB(data_reg, rbase, r0));
+ tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
insn = qemu_exts_opc[s_bits];
tcg_out32(s, insn | RA(data_reg) | RS(data_reg));
}
@@ -966,10 +985,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
{
- TCGReg addr_reg, r0, r1, rbase, data_reg;
+ TCGReg addr_reg, rbase, data_reg;
uint32_t insn;
#ifdef CONFIG_SOFTMMU
- TCGReg r2, ir;
+ TCGReg ir;
int mem_index;
void *label1_ptr, *label2_ptr;
#endif
@@ -980,20 +999,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU
mem_index = *args;
- r0 = TCG_REG_R3;
- r1 = TCG_REG_R4;
- r2 = TCG_REG_R0;
- rbase = 0;
-
- tcg_out_tlb_read(s, r0, r1, r2, addr_reg, opc,
- offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
-
- tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L);
+ addr_reg = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false);
label1_ptr = s->code_ptr;
-#ifdef FAST_PATH
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE);
-#endif
/* slow path */
ir = TCG_REG_R3;
@@ -1008,34 +1017,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_out32(s, B);
/* label1: fast path */
-#ifdef FAST_PATH
- reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
-#endif
-
- tcg_out32(s, LD | TAI(r0, r0,
- offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_write)));
- /* r0 = env->tlb_table[mem_index][index].addend */
- tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
- /* r0 = env->tlb_table[mem_index][index].addend + addr */
+ reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr);
+ rbase = TCG_REG_R3;
#else /* !CONFIG_SOFTMMU */
-#if TARGET_LONG_BITS == 32
- tcg_out_ext32u(s, addr_reg, addr_reg);
-#endif
- r1 = TCG_REG_R3;
- r0 = addr_reg;
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_R2, addr_reg);
+ addr_reg = TCG_REG_R2;
+ }
#endif
insn = qemu_stx_opc[opc];
if (!HAVE_ISA_2_06 && insn == STDBRX) {
- tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0));
- tcg_out32(s, ADDI | TAI(r1, r0, 4));
+ tcg_out32(s, STWBRX | SAB(data_reg, rbase, addr_reg));
+ tcg_out32(s, ADDI | TAI(TCG_REG_R2, addr_reg, 4));
tcg_out_shri64(s, TCG_REG_R0, data_reg, 32);
- tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, r1));
+ tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_R2));
} else {
- tcg_out32(s, insn | SAB(data_reg, rbase, r0));
+ tcg_out32(s, insn | SAB(data_reg, rbase, addr_reg));
}
#ifdef CONFIG_SOFTMMU
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 20/21] tcg-ppc64: Add _noaddr functions for emitting forward branches
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (18 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 19/21] tcg-ppc64: Streamline tcg_out_tlb_read Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 21/21] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION Richard Henderson
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
... rather than open-coding this stuff through the file.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 2076299..c225c8e 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -204,6 +204,18 @@ static void reloc_pc14(void *pc, tcg_target_long target)
*(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target);
}
+static inline void tcg_out_b_noaddr(TCGContext *s, int insn)
+{
+ unsigned retrans = *(uint32_t *)s->code_ptr & 0x3fffffc;
+ tcg_out32(s, insn | retrans);
+}
+
+static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
+{
+ unsigned retrans = *(uint32_t *)s->code_ptr & 0xfffc;
+ tcg_out32(s, insn | retrans);
+}
+
static void patch_reloc(uint8_t *code_ptr, int type,
intptr_t value, intptr_t addend)
{
@@ -1362,11 +1374,8 @@ static void tcg_out_bc(TCGContext *s, int bc, int label_index)
if (l->has_value) {
tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value));
} else {
- uint16_t val = *(uint16_t *) &s->code_ptr[2];
-
- /* Thanks to Andrzej Zaborowski */
- tcg_out32(s, bc | (val & 0xfffc));
- tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL14, label_index, 0);
+ tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0);
+ tcg_out_bc_noaddr(s, bc);
}
}
@@ -1466,11 +1475,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
if (l->has_value) {
tcg_out_b(s, 0, l->u.value);
} else {
- uint32_t val = *(uint32_t *) s->code_ptr;
-
- /* Thanks to Andrzej Zaborowski */
- tcg_out32(s, B | (val & 0x3fffffc));
- tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL24, args[0], 0);
+ tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0);
+ tcg_out_b_noaddr(s, B);
}
}
break;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [Qemu-devel] [PULL 21/21] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
` (19 preceding siblings ...)
2013-09-25 16:27 ` [Qemu-devel] [PULL 20/21] tcg-ppc64: Add _noaddr functions for emitting forward branches Richard Henderson
@ 2013-09-25 16:27 ` Richard Henderson
20 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2013-09-25 16:27 UTC (permalink / raw)
To: qemu-devel; +Cc: anthony
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
configure | 2 +-
tcg/ppc64/tcg-target.c | 212 +++++++++++++++++++++++++++++++------------------
2 files changed, 136 insertions(+), 78 deletions(-)
diff --git a/configure b/configure
index ef4d9bf..ba2d2b0 100755
--- a/configure
+++ b/configure
@@ -3800,7 +3800,7 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak
echo "ARCH=$ARCH" >> $config_host_mak
case "$cpu" in
- arm|i386|x86_64|x32|ppc|aarch64)
+ aarch64 | arm | i386 | x86_64 | x32 | ppc*)
# The TCG interpreter currently does not support ld/st optimization.
if test "$tcg_interpreter" = "no" ; then
echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_host_mak
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index c225c8e..332f4d8 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -807,23 +807,47 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
}
}
-#if defined(CONFIG_SOFTMMU)
+static const uint32_t qemu_ldx_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ LBZX, LHZX, LWZX, LDX,
+ 0, LHAX, LWAX, LDX
+#else
+ LBZX, LHBRX, LWBRX, LDBRX,
+ 0, 0, 0, LDBRX,
+#endif
+};
+
+static const uint32_t qemu_stx_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ STBX, STHX, STWX, STDX
+#else
+ STBX, STHBRX, STWBRX, STDBRX,
+#endif
+};
+
+static const uint32_t qemu_exts_opc[4] = {
+ EXTSB, EXTSH, EXTSW, 0
+};
+
+#if defined (CONFIG_SOFTMMU)
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
+ * int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+ helper_ret_ldub_mmu,
+ helper_ret_lduw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
};
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};
/* Perform the TLB load and compare. Places the result of the comparison
@@ -899,38 +923,105 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg,
return addr_reg;
}
-#endif
-static const uint32_t qemu_ldx_opc[8] = {
-#ifdef TARGET_WORDS_BIGENDIAN
- LBZX, LHZX, LWZX, LDX,
- 0, LHAX, LWAX, LDX
-#else
- LBZX, LHBRX, LWBRX, LDBRX,
- 0, 0, 0, LDBRX,
-#endif
-};
+/* Record the context of a call to the out of line helper code for the slow
+ path for a load or store, so that we can later generate the correct
+ helper code. */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, int opc,
+ int data_reg, int addr_reg, int mem_index,
+ uint8_t *raddr, uint8_t *label_ptr)
+{
+ int idx;
+ TCGLabelQemuLdst *label;
-static const uint32_t qemu_stx_opc[4] = {
-#ifdef TARGET_WORDS_BIGENDIAN
- STBX, STHX, STWX, STDX
-#else
- STBX, STHBRX, STWBRX, STDBRX,
-#endif
-};
+ if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
+ tcg_abort();
+ }
-static const uint32_t qemu_exts_opc[4] = {
- EXTSB, EXTSH, EXTSW, 0
-};
+ idx = s->nb_qemu_ldst_labels++;
+ label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
+ label->is_ld = is_ld;
+ label->opc = opc;
+ label->datalo_reg = data_reg;
+ label->addrlo_reg = addr_reg;
+ label->mem_index = mem_index;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr;
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ int opc = lb->opc;
+ int s_bits = opc & 3;
+
+ reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0);
+
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
+
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
+ tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR);
+
+ tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1);
+
+ if (opc & 4) {
+ uint32_t insn = qemu_exts_opc[s_bits];
+ tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3));
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3);
+ }
+
+ tcg_out_b(s, 0, (uintptr_t)lb->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ int opc = lb->opc;
+
+ reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0);
+
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
+
+ tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg,
+ 0, 64 - (1 << (3 + opc)));
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index);
+ tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR);
+
+ tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1);
+
+ tcg_out_b(s, 0, (uintptr_t)lb->raddr);
+}
+
+void tcg_out_tb_finalize(TCGContext *s)
+{
+ int i, n = s->nb_qemu_ldst_labels;
+
+ /* qemu_ld/st slow paths */
+ for (i = 0; i < n; i++) {
+ TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i];
+ if (label->is_ld) {
+ tcg_out_qemu_ld_slow_path(s, label);
+ } else {
+ tcg_out_qemu_st_slow_path(s, label);
+ }
+ }
+}
+#endif /* SOFTMMU */
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
TCGReg addr_reg, data_reg, rbase;
uint32_t insn, s_bits;
#ifdef CONFIG_SOFTMMU
- TCGReg ir;
int mem_index;
- void *label1_ptr, *label2_ptr;
+ void *label_ptr;
#endif
data_reg = *args++;
@@ -942,29 +1033,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
- label1_ptr = s->code_ptr;
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE);
-
- /* slow path */
- ir = TCG_REG_R3;
- tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
- tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
-
- tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
-
- if (opc & 4) {
- insn = qemu_exts_opc[s_bits];
- tcg_out32(s, insn | RA(data_reg) | RS(TCG_REG_R3));
- } else if (data_reg != TCG_REG_R3) {
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R3);
- }
-
- label2_ptr = s->code_ptr;
- tcg_out32(s, B);
-
- /* label1: fast path */
- reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ label_ptr = s->code_ptr;
+ tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
rbase = TCG_REG_R3;
#else /* !CONFIG_SOFTMMU */
@@ -991,7 +1062,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
}
#ifdef CONFIG_SOFTMMU
- reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr);
+ add_qemu_ldst_label(s, true, opc, data_reg, addr_reg, mem_index,
+ s->code_ptr, label_ptr);
#endif
}
@@ -1000,9 +1072,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
TCGReg addr_reg, rbase, data_reg;
uint32_t insn;
#ifdef CONFIG_SOFTMMU
- TCGReg ir;
int mem_index;
- void *label1_ptr, *label2_ptr;
+ void *label_ptr;
#endif
data_reg = *args++;
@@ -1013,23 +1084,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
addr_reg = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false);
- label1_ptr = s->code_ptr;
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE);
-
- /* slow path */
- ir = TCG_REG_R3;
- tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
- tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
- tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
-
- tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1);
-
- label2_ptr = s->code_ptr;
- tcg_out32(s, B);
-
- /* label1: fast path */
- reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr);
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ label_ptr = s->code_ptr;
+ tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
rbase = TCG_REG_R3;
#else /* !CONFIG_SOFTMMU */
@@ -1051,7 +1108,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
}
#ifdef CONFIG_SOFTMMU
- reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr);
+ add_qemu_ldst_label(s, false, opc, data_reg, addr_reg, mem_index,
+ s->code_ptr, label_ptr);
#endif
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 22+ messages in thread