* [Qemu-devel] [PATCH 1/5] tcg-arm: Use ldrd/strd for appropriate qemu_ld/st64
2013-08-28 22:33 [Qemu-devel] [PATCH 0/5] tcg-arm ldst improvements Richard Henderson
@ 2013-08-28 22:33 ` Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 2/5] tcg-arm: Rearrange slow-path qemu_ld/st Richard Henderson
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2013-08-28 22:33 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/arm/tcg-target.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 43 insertions(+), 5 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 6d084b3..9176930 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -320,6 +320,9 @@ typedef enum {
INSN_STRB_REG = 0x06400000,
INSN_LDRD_IMM = 0x004000d0,
+ INSN_LDRD_REG = 0x000000d0,
+ INSN_STRD_IMM = 0x004000f0,
+ INSN_STRD_REG = 0x000000f0,
} ARMInsn;
#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
@@ -810,6 +813,30 @@ static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
}
+static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
+}
+
/* Register pre-increment with base writeback. */
static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
TCGReg rn, TCGReg rm)
@@ -1397,6 +1424,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4);
tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
+ } else if (use_armv6_instructions
+ && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
+ tcg_out_ldrd_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
} else {
tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg);
tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4);
@@ -1450,9 +1480,13 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
}
break;
case 3:
- /* TODO: use block load -
- * check that data_reg2 > data_reg or the other way */
- if (data_reg == addr_reg) {
+ if (use_armv6_instructions && !bswap
+ && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
+ tcg_out_ldrd_8(s, COND_AL, data_reg, addr_reg, 0);
+ } else if (use_armv6_instructions && bswap
+ && (data_reg2 & 1) == 0 && data_reg == data_reg2 + 1) {
+ tcg_out_ldrd_8(s, COND_AL, data_reg2, addr_reg, 0);
+ } else if (data_reg == addr_reg) {
tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
} else {
@@ -1529,6 +1563,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_out_st32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R1, addr_reg);
tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R1, 4);
+ } else if (use_armv6_instructions
+ && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
+ tcg_out_strd_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
} else {
tcg_out_st32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg);
tcg_out_st32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4);
@@ -1576,13 +1613,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
}
break;
case 3:
- /* TODO: use block store -
- * check that data_reg2 > data_reg or the other way */
if (bswap) {
tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2);
tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4);
+ } else if (use_armv6_instructions
+ && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
+ tcg_out_strd_8(s, COND_AL, data_reg, addr_reg, 0);
} else {
tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 2/5] tcg-arm: Rearrange slow-path qemu_ld/st
2013-08-28 22:33 [Qemu-devel] [PATCH 0/5] tcg-arm ldst improvements Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 1/5] tcg-arm: Use ldrd/strd for appropriate qemu_ld/st64 Richard Henderson
@ 2013-08-28 22:33 ` Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 3/5] tcg-arm: Use strd for tcg_out_arg_reg64 Richard Henderson
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2013-08-28 22:33 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien
Instead of using a branch-call-branch sequence, arrange for a
call-branch sequence, using the ARM's conditional call insn.
This reduces the size of the slow-path within the TB, and makes
the GETPC_EXT implementation identical for TCG and not-TCG.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
include/exec/exec-all.h | 14 ---
tcg/arm/tcg-target.c | 270 +++++++++++++++++++++---------------------------
2 files changed, 120 insertions(+), 164 deletions(-)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index ffee83f..60c025c 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -326,20 +326,6 @@ extern uintptr_t tci_tb_ptr;
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4))
-# elif defined(__arm__)
-/* We define two insns between the return address and the branch back to
- straight-line. Find and decode that branch insn. */
-# define GETRA_LDST(RA) tcg_getra_ldst(RA)
-static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
-{
- int32_t b;
- ra += 8; /* skip the two insns */
- b = *(int32_t *)ra; /* load the branch insn */
- b = (b << 8) >> (8 - 2); /* extract the displacement */
- ra += 8; /* branches are relative to pc+8 */
- ra += b; /* apply the displacement */
- return ra;
-}
# elif defined(__aarch64__)
# define GETRA_LDST(RA) tcg_getra_ldst(RA)
static inline uintptr_t tcg_getra_ldst(uintptr_t ra)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 9176930..5f71f24 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -166,29 +166,37 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
break;
case 'r':
+#ifndef CONFIG_SOFTMMU
+ case 'a':
+ case 'b':
+#endif
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
break;
+#ifdef CONFIG_SOFTMMU
+ /* qemu_ld data registers -- softmmu uses the return registers only. */
+ case 'a':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 1);
+ break;
+ case 'b':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 2);
+ break;
+#endif
+
/* qemu_ld address */
case 'l':
ct->ct |= TCG_CT_REG;
tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
#ifdef CONFIG_SOFTMMU
- /* r0-r2 will be overwritten when reading the tlb entry,
+ /* r0-r2,lr will be overwritten when reading the tlb entry,
so don't use these. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-#endif
- break;
- case 'L':
- ct->ct |= TCG_CT_REG;
- tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
-#ifdef CONFIG_SOFTMMU
- /* r1 is still needed to load data_reg or data_reg2,
- so don't use it. */
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
#endif
break;
@@ -207,6 +215,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
/* Avoid clashes with registers being used for helper args */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
#endif
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
#endif
break;
@@ -382,13 +391,17 @@ static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
/* We pay attention here to not modify the branch target by skipping
the corresponding bytes. This ensure that caches and memory are
kept coherent during retranslation. */
-#ifdef HOST_WORDS_BIGENDIAN
- tcg_out8(s, (cond << 4) | 0x0a);
- s->code_ptr += 3;
-#else
s->code_ptr += 3;
tcg_out8(s, (cond << 4) | 0x0a);
-#endif
+}
+
+static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
+{
+ /* We pay attention here to not modify the branch target by skipping
+ the corresponding bytes. This ensure that caches and memory are
+ kept coherent during retranslation. */
+ s->code_ptr += 3;
+ tcg_out8(s, (cond << 4) | 0x0b);
}
static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
@@ -1002,34 +1015,27 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
tcg_out_st8_12(s, cond, rd, rn, offset);
}
-/* The _goto case is normally between TBs within the same code buffer,
- * and with the code buffer limited to 16MB we shouldn't need the long
- * case.
- *
- * .... except to the prologue that is in its own buffer.
+/* The _goto case is normally between TBs within the same code buffer, and
+ * with the code buffer limited to 16MB we wouldn't need the long case.
+ * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
*/
static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
{
- int32_t val;
+ int32_t disp = addr - (tcg_target_long) s->code_ptr;
- if (addr & 1) {
- /* goto to a Thumb destination isn't supported */
- tcg_abort();
+ if ((addr & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
+ tcg_out_b(s, cond, disp);
+ return;
}
- val = addr - (tcg_target_long) s->code_ptr;
- if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd)
- tcg_out_b(s, cond, val);
- else {
- if (cond == COND_AL) {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
- tcg_out32(s, addr);
- } else {
- tcg_out_movi32(s, cond, TCG_REG_TMP, val - 8);
- tcg_out_dat_reg(s, cond, ARITH_ADD,
- TCG_REG_PC, TCG_REG_PC,
- TCG_REG_TMP, SHIFT_IMM_LSL(0));
+ tcg_out_movi32(s, cond, TCG_REG_TMP, addr);
+ if (use_armv5t_instructions) {
+ tcg_out_bx(s, cond, TCG_REG_TMP);
+ } else {
+ if (addr & 1) {
+ tcg_abort();
}
+ tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
}
}
@@ -1084,23 +1090,29 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
}
#ifdef CONFIG_SOFTMMU
-
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
-static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static const void * const qemu_ld_helpers[8] = {
+ helper_ret_ldub_mmu,
+ helper_ret_lduw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
+
+ helper_ret_ldsb_mmu,
+ helper_ret_ldsw_mmu,
+ helper_ret_ldul_mmu,
+ helper_ret_ldq_mmu,
};
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};
/* Helper routines for marshalling helper function arguments into
@@ -1258,8 +1270,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- TCGReg argreg, data_reg, data_reg2;
- uint8_t *start;
+ TCGReg argreg;
reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
@@ -1270,49 +1281,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
}
argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
- tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[lb->opc & 3]);
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
- data_reg = lb->datalo_reg;
- data_reg2 = lb->datahi_reg;
-
- start = s->code_ptr;
- switch (lb->opc) {
- case 0 | 4:
- tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
- break;
- case 1 | 4:
- tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
- break;
- case 0:
- case 1:
- case 2:
- default:
- tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
- break;
- case 3:
- tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
- tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
- break;
- }
-
- /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between
- the call and the branch back to straight-line code. Note that the
- moves above could be elided by register allocation, nor do we know
- which code alternative we chose for extension. */
- switch (s->code_ptr - start) {
- case 0:
- tcg_out_nop(s);
- /* FALLTHRU */
- case 4:
- tcg_out_nop(s);
- /* FALLTHRU */
- case 8:
- break;
- default:
- abort();
- }
-
- tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr);
+ /* Tail-call to the helper, which will return to the fast path. */
+ tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_ld_helpers[lb->opc]);
}
static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@@ -1321,8 +1293,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
- argreg = TCG_REG_R0;
- argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
+ argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
if (TARGET_LONG_BITS == 64) {
argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
} else {
@@ -1347,13 +1318,10 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
- tcg_out_call(s, (tcg_target_long) qemu_st_helpers[lb->opc & 3]);
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
- /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between
- the call and the branch back to straight-line code. */
- tcg_out_nop(s);
- tcg_out_nop(s);
- tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr);
+ /* Tail-call to the helper, which will return to the fast path. */
+ tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_st_helpers[lb->opc & 3]);
}
#endif /* SOFTMMU */
@@ -1383,57 +1351,58 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
- label_ptr = s->code_ptr;
- tcg_out_b_noaddr(s, COND_NE);
-
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
offsetof(CPUTLBEntry, addend)
- offsetof(CPUTLBEntry, addr_read));
switch (opc) {
case 0:
- tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
break;
case 0 | 4:
- tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
break;
case 1:
- tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
if (bswap) {
- tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
+ tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
- tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
+ tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
} else {
- tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
}
break;
case 2:
default:
- tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
if (bswap) {
- tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
+ tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
}
break;
case 3:
if (bswap) {
- tcg_out_ld32_rwb(s, COND_AL, data_reg2, TCG_REG_R1, addr_reg);
- tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4);
- tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
- tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
+ tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
+ tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
+ tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
+ tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
} else if (use_armv6_instructions
&& (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
- tcg_out_ldrd_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ldrd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
} else {
- tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg);
- tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4);
+ tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
+ tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
}
break;
}
+ /* The conditional call must come last, as we're going to return here. */
+ label_ptr = s->code_ptr;
+ tcg_out_bl_noaddr(s, COND_NE);
+
add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2,
mem_index, s->code_ptr, label_ptr);
#else /* !CONFIG_SOFTMMU */
@@ -1529,50 +1498,51 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
offsetof(CPUArchState,
tlb_table[mem_index][0].addr_write));
- label_ptr = s->code_ptr;
- tcg_out_b_noaddr(s, COND_NE);
-
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
offsetof(CPUTLBEntry, addend)
- offsetof(CPUTLBEntry, addr_write));
switch (opc) {
case 0:
- tcg_out_st8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
break;
case 1:
if (bswap) {
- tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg);
- tcg_out_st16_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1);
+ tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg);
+ tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
} else {
- tcg_out_st16_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
}
break;
case 2:
default:
if (bswap) {
- tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
- tcg_out_st32_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1);
+ tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
+ tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
} else {
- tcg_out_st32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
}
break;
case 3:
if (bswap) {
- tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2);
- tcg_out_st32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R1, addr_reg);
- tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
- tcg_out_st32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R1, 4);
+ tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2);
+ tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg);
+ tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
+ tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4);
} else if (use_armv6_instructions
&& (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
- tcg_out_strd_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_strd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
} else {
- tcg_out_st32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg);
- tcg_out_st32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4);
+ tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
+ tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
}
break;
}
+ /* The conditional call must come last, as we're going to return here. */
+ label_ptr = s->code_ptr;
+ tcg_out_bl_noaddr(s, COND_NE);
+
add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2,
mem_index, s->code_ptr, label_ptr);
#else /* !CONFIG_SOFTMMU */
@@ -2024,24 +1994,24 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
#if TARGET_LONG_BITS == 32
- { INDEX_op_qemu_ld8u, { "r", "l" } },
- { INDEX_op_qemu_ld8s, { "r", "l" } },
- { INDEX_op_qemu_ld16u, { "r", "l" } },
- { INDEX_op_qemu_ld16s, { "r", "l" } },
- { INDEX_op_qemu_ld32, { "r", "l" } },
- { INDEX_op_qemu_ld64, { "L", "L", "l" } },
+ { INDEX_op_qemu_ld8u, { "a", "l" } },
+ { INDEX_op_qemu_ld8s, { "a", "l" } },
+ { INDEX_op_qemu_ld16u, { "a", "l" } },
+ { INDEX_op_qemu_ld16s, { "a", "l" } },
+ { INDEX_op_qemu_ld32, { "a", "l" } },
+ { INDEX_op_qemu_ld64, { "a", "b", "l" } },
{ INDEX_op_qemu_st8, { "s", "s" } },
{ INDEX_op_qemu_st16, { "s", "s" } },
{ INDEX_op_qemu_st32, { "s", "s" } },
{ INDEX_op_qemu_st64, { "s", "s", "s" } },
#else
- { INDEX_op_qemu_ld8u, { "r", "l", "l" } },
- { INDEX_op_qemu_ld8s, { "r", "l", "l" } },
- { INDEX_op_qemu_ld16u, { "r", "l", "l" } },
- { INDEX_op_qemu_ld16s, { "r", "l", "l" } },
- { INDEX_op_qemu_ld32, { "r", "l", "l" } },
- { INDEX_op_qemu_ld64, { "L", "L", "l", "l" } },
+ { INDEX_op_qemu_ld8u, { "a", "l", "l" } },
+ { INDEX_op_qemu_ld8s, { "a", "l", "l" } },
+ { INDEX_op_qemu_ld16u, { "a", "l", "l" } },
+ { INDEX_op_qemu_ld16s, { "a", "l", "l" } },
+ { INDEX_op_qemu_ld32, { "a", "l", "l" } },
+ { INDEX_op_qemu_ld64, { "a", "b", "l", "l" } },
{ INDEX_op_qemu_st8, { "s", "s", "s" } },
{ INDEX_op_qemu_st16, { "s", "s", "s" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 3/5] tcg-arm: Use strd for tcg_out_arg_reg64
2013-08-28 22:33 [Qemu-devel] [PATCH 0/5] tcg-arm ldst improvements Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 1/5] tcg-arm: Use ldrd/strd for appropriate qemu_ld/st64 Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 2/5] tcg-arm: Rearrange slow-path qemu_ld/st Richard Henderson
@ 2013-08-28 22:33 ` Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 4/5] tcg-arm: Use QEMU_BUILD_BUG_ON to verify constraints on tlb Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 5/5] tcg-arm: Better pipeline for softmmu tlb access Richard Henderson
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2013-08-28 22:33 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/arm/tcg-target.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 5f71f24..a0a7539 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1156,9 +1156,16 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
if (argreg & 1) {
argreg++;
}
- argreg = tcg_out_arg_reg32(s, argreg, arglo);
- argreg = tcg_out_arg_reg32(s, argreg, arghi);
- return argreg;
+ if (use_armv6_instructions && argreg >= 4
+ && (arglo & 1) == 0 && arghi == arglo + 1) {
+ tcg_out_strd_8(s, COND_AL, arglo,
+ TCG_REG_CALL_STACK, (argreg - 4) * 4);
+ return argreg + 2;
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, arglo);
+ argreg = tcg_out_arg_reg32(s, argreg, arghi);
+ return argreg;
+ }
}
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 4/5] tcg-arm: Use QEMU_BUILD_BUG_ON to verify constraints on tlb
2013-08-28 22:33 [Qemu-devel] [PATCH 0/5] tcg-arm ldst improvements Richard Henderson
` (2 preceding siblings ...)
2013-08-28 22:33 ` [Qemu-devel] [PATCH 3/5] tcg-arm: Use strd for tcg_out_arg_reg64 Richard Henderson
@ 2013-08-28 22:33 ` Richard Henderson
2013-08-28 22:33 ` [Qemu-devel] [PATCH 5/5] tcg-arm: Better pipeline for softmmu tlb access Richard Henderson
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2013-08-28 22:33 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien
One of the two constraints we already checked via #if, but
the tlb offset distance was only checked at runtime.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/arm/tcg-target.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index a0a7539..f1e547f 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1170,6 +1170,15 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
+/* We're expecting to use an 8-bit immediate and to mask. */
+QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
+
+/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
+ Using the offset of the second entry in the last tlb table ensures
+ that we can index all of the elements of the first entry. */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+ > 0xffff);
+
/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing
to the tlb entry. Clobbers R1 and TMP. */
@@ -1197,14 +1206,10 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
* ldr r0, [r2, r0]! (3)
* cmp r0, tmp (4)
*/
-# if CPU_TLB_BITS > 8
-# error
-# endif
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
- /* We assume that the offset is contained within 16 bits. */
- assert((tlb_offset & ~0xffff) == 0);
+ /* We checked that the offset is contained within 16 bits above. */
if (tlb_offset > 0xff) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
(24 << 7) | (tlb_offset >> 8));
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 5/5] tcg-arm: Better pipeline for softmmu tlb access
2013-08-28 22:33 [Qemu-devel] [PATCH 0/5] tcg-arm ldst improvements Richard Henderson
` (3 preceding siblings ...)
2013-08-28 22:33 ` [Qemu-devel] [PATCH 4/5] tcg-arm: Use QEMU_BUILD_BUG_ON to verify constraints on tlb Richard Henderson
@ 2013-08-28 22:33 ` Richard Henderson
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2013-08-28 22:33 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien
Moves the load for the tlb addend earlier, to better load latency.
Avoids the writeback from the comparator load, since we know how
to adjust the offset between the two loads.
: e2862c03 add r2, r6, #768 ; 0x300
: e20c00ff and r0, ip, #255 ; 0xff
: e0822280 add r2, r2, r0, lsl #5
-: e1e209d8 ldrd r0, [r2, #152]!
+: e1c209d8 ldrd r0, [r2, #152]
: e31b0007 tst fp, #7 ; 0x7
+: e59220a8 ldr r2, [r2, #168]
: 0150068c cmpeq r0, ip, lsl #13
: 01510007 cmpeq r1, r7
-: e5921010 ldr r1, [r2, #16]
-: 018b40f1 strdeq r4, [fp, r1]
+: 018b40f2 strdeq r4, [fp, r2]
: 1b0000cb blne 0x75e7e6e4
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/arm/tcg-target.c | 97 +++++++++++++++++++++++-----------------------------
1 file changed, 42 insertions(+), 55 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index f1e547f..6d03d6b 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1179,13 +1179,18 @@ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
> 0xffff);
-/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing
- to the tlb entry. Clobbers R1 and TMP. */
+/* Load and compare a TLB entry, leaving the flags set. Leaves R2
+ containing the tlb addend. Clobbers R0, R1 and TMP. */
static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
- int s_bits, int tlb_offset)
+ int s_bits, int mem_index, bool is_load)
{
TCGReg base = TCG_AREG0;
+ int tlb_offset =
+ (is_load
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
/* Should generate something like the following:
* pre-v7:
@@ -1193,18 +1198,10 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
* add r2, env, #off & 0xff00
* and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
* add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
- * ldr r0, [r2, #off & 0xff]! (4)
+ * ldr r0, [r2, #off & 0xff] (4)
* tst addr_reg, #s_mask
- * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5)
- *
- * v7 (not implemented yet):
- * ubfx r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS (1)
- * movw tmp, #~TARGET_PAGE_MASK & ~s_mask
- * movw r0, #off
- * add r2, env, r2, lsl #CPU_TLB_ENTRY_BITS (2)
- * bic tmp, addr_reg, tmp
- * ldr r0, [r2, r0]! (3)
- * cmp r0, tmp (4)
+ * ldr r2, [r2, #addoff] (5)
+ * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
*/
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
@@ -1213,7 +1210,6 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
if (tlb_offset > 0xff) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
(24 << 7) | (tlb_offset >> 8));
- tlb_offset &= 0xff;
base = TCG_REG_R2;
}
@@ -1226,14 +1222,12 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
but due to how the pointer needs setting up, ldm isn't useful.
Base arm5 doesn't have ldrd, but armv5te does. */
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0,
- TCG_REG_R2, tlb_offset, 1, 1);
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, tlb_offset & 0xff);
} else {
- tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0,
- TCG_REG_R2, tlb_offset, 1, 1);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, tlb_offset & 0xff);
if (TARGET_LONG_BITS == 64) {
- tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1,
- TCG_REG_R2, 4, 1, 0);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
+ (tlb_offset & 0xff) + 4);
}
}
@@ -1243,6 +1237,10 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
0, addrlo, (1 << s_bits) - 1);
}
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2,
+ add_offset - (tlb_offset & 0xff00));
+
tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
@@ -1360,53 +1358,48 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
mem_index = *args;
s_bits = opc & 3;
- tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
- offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
-
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
- offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_read));
+ tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 1);
switch (opc) {
case 0:
- tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
break;
case 0 | 4:
- tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
break;
case 1:
- tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
if (bswap) {
tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
} else {
- tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
}
break;
case 2:
default:
- tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
if (bswap) {
tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
}
break;
case 3:
if (bswap) {
- tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
- tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
+ tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R2, addr_reg);
+ tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R2, 4);
tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
} else if (use_armv6_instructions
&& (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
- tcg_out_ldrd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_ldrd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
} else {
- tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
- tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
+ tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R2, addr_reg);
+ tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R2, 4);
}
break;
}
@@ -1506,47 +1499,41 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
mem_index = *args;
s_bits = opc & 3;
- tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
- offsetof(CPUArchState,
- tlb_table[mem_index][0].addr_write));
-
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
- offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_write));
+ tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 0);
switch (opc) {
case 0:
- tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
break;
case 1:
if (bswap) {
tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg);
- tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
+ tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R2);
} else {
- tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
}
break;
case 2:
default:
if (bswap) {
tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
- tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
+ tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R2);
} else {
- tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
}
break;
case 3:
if (bswap) {
tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2);
- tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg);
+ tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R2, addr_reg);
tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
- tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4);
+ tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R2, 4);
} else if (use_armv6_instructions
&& (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
- tcg_out_strd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+ tcg_out_strd_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R2);
} else {
- tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
- tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
+ tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R2, addr_reg);
+ tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R2, 4);
}
break;
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread