* [Qemu-devel] [PATCH 1/8] target-sh4: use bit number for SR constants
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
@ 2013-12-21 16:58 ` Aurelien Jarno
2013-12-21 16:58 ` [Qemu-devel] [PATCH 2/8] target-sh4: Split out T from SR Aurelien Jarno
` (6 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:58 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Use the bit number for SR constants instead of using a bit mask. This
make possible to also use the constants for shifts.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/cpu.c | 3 +-
target-sh4/cpu.h | 30 +++++++++----------
target-sh4/gdbstub.c | 4 +--
target-sh4/helper.c | 27 ++++++++---------
target-sh4/op_helper.c | 26 ++++++++---------
target-sh4/translate.c | 75 +++++++++++++++++++++++++-----------------------
6 files changed, 85 insertions(+), 80 deletions(-)
diff --git a/target-sh4/cpu.c b/target-sh4/cpu.c
index c23294d..1d80f47 100644
--- a/target-sh4/cpu.c
+++ b/target-sh4/cpu.c
@@ -56,7 +56,8 @@ static void superh_cpu_reset(CPUState *s)
env->fpscr = FPSCR_PR; /* value for userspace according to the kernel */
set_float_rounding_mode(float_round_nearest_even, &env->fp_status); /* ?! */
#else
- env->sr = SR_MD | SR_RB | SR_BL | SR_I3 | SR_I2 | SR_I1 | SR_I0;
+ env->sr = (1u << SR_MD) | (1u << SR_RB) | (1u << SR_BL) |
+ (1u << SR_I3) | (1u << SR_I2) | (1u << SR_I1) | (1u << SR_I0);
env->fpscr = FPSCR_DN | FPSCR_RM_ZERO; /* CPU reset value according to SH4 manual */
set_float_rounding_mode(float_round_to_zero, &env->fp_status);
set_flush_to_zero(1, &env->fp_status);
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index c181dda..c8ba70f 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -48,18 +48,18 @@
#define TARGET_PHYS_ADDR_SPACE_BITS 32
#define TARGET_VIRT_ADDR_SPACE_BITS 32
-#define SR_MD (1 << 30)
-#define SR_RB (1 << 29)
-#define SR_BL (1 << 28)
-#define SR_FD (1 << 15)
-#define SR_M (1 << 9)
-#define SR_Q (1 << 8)
-#define SR_I3 (1 << 7)
-#define SR_I2 (1 << 6)
-#define SR_I1 (1 << 5)
-#define SR_I0 (1 << 4)
-#define SR_S (1 << 1)
-#define SR_T (1 << 0)
+#define SR_MD 30
+#define SR_RB 29
+#define SR_BL 28
+#define SR_FD 15
+#define SR_M 9
+#define SR_Q 8
+#define SR_I3 7
+#define SR_I2 6
+#define SR_I1 5
+#define SR_I0 4
+#define SR_S 1
+#define SR_T 0
#define FPSCR_MASK (0x003fffff)
#define FPSCR_FR (1 << 21)
@@ -242,7 +242,7 @@ static inline CPUSH4State *cpu_init(const char *cpu_model)
#define MMU_USER_IDX 1
static inline int cpu_mmu_index (CPUSH4State *env)
{
- return (env->sr & SR_MD) == 0 ? 1 : 0;
+ return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
}
#include "exec/cpu-all.h"
@@ -347,8 +347,8 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
*flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
| DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */
| (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
- | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */
- | (env->sr & SR_FD) /* Bit 15 */
+ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */
+ | (env->sr & (1u << SR_FD)) /* Bit 15 */
| (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */
}
diff --git a/target-sh4/gdbstub.c b/target-sh4/gdbstub.c
index df4fa2a..05ba728 100644
--- a/target-sh4/gdbstub.c
+++ b/target-sh4/gdbstub.c
@@ -31,7 +31,7 @@ int superh_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
switch (n) {
case 0 ... 7:
- if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
+ if ((env->sr & (1u << SR_MD)) && (env->sr & (1u << SR_RB))) {
return gdb_get_regl(mem_buf, env->gregs[n + 16]);
} else {
return gdb_get_regl(mem_buf, env->gregs[n]);
@@ -83,7 +83,7 @@ int superh_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
switch (n) {
case 0 ... 7:
- if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
+ if ((env->sr & (1u << SR_MD)) && (env->sr & (1u << SR_RB))) {
env->gregs[n + 16] = ldl_p(mem_buf);
} else {
env->gregs[n] = ldl_p(mem_buf);
diff --git a/target-sh4/helper.c b/target-sh4/helper.c
index 9ac2825..07f8e91 100644
--- a/target-sh4/helper.c
+++ b/target-sh4/helper.c
@@ -93,7 +93,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
do_exp = env->exception_index != -1;
do_irq = do_irq && (env->exception_index == -1);
- if (env->sr & SR_BL) {
+ if (env->sr & (1u << SR_BL)) {
if (do_exp && env->exception_index != 0x1e0) {
env->exception_index = 0x000; /* masked exception -> reset */
}
@@ -165,7 +165,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
env->ssr = env->sr;
env->spc = env->pc;
env->sgr = env->gregs[15];
- env->sr |= SR_BL | SR_MD | SR_RB;
+ env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
if (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
/* Branch instruction should be executed again before delay slot. */
@@ -182,7 +182,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
case 0x000:
case 0x020:
case 0x140:
- env->sr &= ~SR_FD;
+ env->sr &= ~(1u << SR_FD);
env->sr |= 0xf << 4; /* IMASK */
env->pc = 0xa0000000;
break;
@@ -349,23 +349,24 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
int use_asid, n;
tlb_t *matching = NULL;
- use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0;
+ use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD));
if (rw == 2) {
n = find_itlb_entry(env, address, use_asid);
if (n >= 0) {
matching = &env->itlb[n];
- if (!(env->sr & SR_MD) && !(matching->pr & 2))
+ if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
n = MMU_ITLB_VIOLATION;
- else
+ } else {
*prot = PAGE_EXEC;
+ }
} else {
n = find_utlb_entry(env, address, use_asid);
if (n >= 0) {
n = copy_utlb_entry_itlb(env, n);
matching = &env->itlb[n];
- if (!(env->sr & SR_MD) && !(matching->pr & 2)) {
- n = MMU_ITLB_VIOLATION;
+ if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
+ n = MMU_ITLB_VIOLATION;
} else {
*prot = PAGE_READ | PAGE_EXEC;
if ((matching->pr & 1) && matching->d) {
@@ -382,7 +383,7 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical,
n = find_utlb_entry(env, address, use_asid);
if (n >= 0) {
matching = &env->utlb[n];
- if (!(env->sr & SR_MD) && !(matching->pr & 2)) {
+ if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) {
n = (rw == 1) ? MMU_DTLB_VIOLATION_WRITE :
MMU_DTLB_VIOLATION_READ;
} else if ((rw == 1) && !(matching->pr & 1)) {
@@ -415,7 +416,7 @@ static int get_physical_address(CPUSH4State * env, target_ulong * physical,
/* P1, P2 and P4 areas do not use translation */
if ((address >= 0x80000000 && address < 0xc0000000) ||
address >= 0xe0000000) {
- if (!(env->sr & SR_MD)
+ if (!(env->sr & (1u << SR_MD))
&& (address < 0xe0000000 || address >= 0xe4000000)) {
/* Unauthorized access in user mode (only store queues are available) */
fprintf(stderr, "Unauthorized access\n");
@@ -681,7 +682,7 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, hwaddr addr,
uint8_t d = (uint8_t)((mem_value & 0x00000200) >> 9);
uint8_t v = (uint8_t)((mem_value & 0x00000100) >> 8);
uint8_t asid = (uint8_t)(mem_value & 0x000000ff);
- int use_asid = (s->mmucr & MMUCR_SV) == 0 || (s->sr & SR_MD) == 0;
+ int use_asid = !(s->mmucr & MMUCR_SV) || !(s->sr & (1u << SR_MD));
if (associate) {
int i;
@@ -807,10 +808,10 @@ void cpu_sh4_write_mmaped_utlb_data(CPUSH4State *s, hwaddr addr,
int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr)
{
int n;
- int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0;
+ int use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD));
/* check area */
- if (env->sr & SR_MD) {
+ if (env->sr & (1u << SR_MD)) {
/* For previledged mode, P2 and P4 area is not cachable. */
if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr)
return 0;
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index e955e81..6d56df2 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -166,15 +166,15 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
}
}
-#define T (env->sr & SR_T)
-#define Q (env->sr & SR_Q ? 1 : 0)
-#define M (env->sr & SR_M ? 1 : 0)
-#define SETT env->sr |= SR_T
-#define CLRT env->sr &= ~SR_T
-#define SETQ env->sr |= SR_Q
-#define CLRQ env->sr &= ~SR_Q
-#define SETM env->sr |= SR_M
-#define CLRM env->sr &= ~SR_M
+#define T (env->sr & (1u << SR_T))
+#define Q (env->sr & (1u << SR_Q) ? 1 : 0)
+#define M (env->sr & (1u << SR_M) ? 1 : 0)
+#define SETT (env->sr |= (1u << SR_T))
+#define CLRT (env->sr &= ~(1u << SR_T))
+#define SETQ (env->sr |= (1u << SR_Q))
+#define CLRQ (env->sr &= ~(1u << SR_Q))
+#define SETM (env->sr |= (1u << SR_M))
+#define CLRM (env->sr &= ~(1u << SR_M))
uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
{
@@ -292,7 +292,7 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
env->mach = (res >> 32) & 0xffffffff;
env->macl = res & 0xffffffff;
- if (env->sr & SR_S) {
+ if (env->sr & (1u << SR_S)) {
if (res < 0)
env->mach |= 0xffff0000;
else
@@ -308,7 +308,7 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
env->mach = (res >> 32) & 0xffffffff;
env->macl = res & 0xffffffff;
- if (env->sr & SR_S) {
+ if (env->sr & (1u << SR_S)) {
if (res < -0x80000000) {
env->mach = 1;
env->macl = 0x80000000;
@@ -321,12 +321,12 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
static inline void set_t(CPUSH4State *env)
{
- env->sr |= SR_T;
+ env->sr |= (1u << SR_T);
}
static inline void clr_t(CPUSH4State *env)
{
- env->sr &= ~SR_T;
+ env->sr &= ~(1u << SR_T);
}
void helper_ld_fpscr(CPUSH4State *env, uint32_t val)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 2272eb0..74425c3 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -44,7 +44,7 @@ typedef struct DisasContext {
#if defined(CONFIG_USER_ONLY)
#define IS_USER(ctx) 1
#else
-#define IS_USER(ctx) (!(ctx->flags & SR_MD))
+#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD)))
#endif
enum {
@@ -211,7 +211,7 @@ static inline void gen_branch_slot(uint32_t delayed_pc, int t)
int label = gen_new_label();
tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
sr = tcg_temp_new();
- tcg_gen_andi_i32(sr, cpu_sr, SR_T);
+ tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T));
tcg_gen_brcondi_i32(t ? TCG_COND_EQ:TCG_COND_NE, sr, 0, label);
tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
gen_set_label(label);
@@ -226,7 +226,7 @@ static void gen_conditional_jump(DisasContext * ctx,
l1 = gen_new_label();
sr = tcg_temp_new();
- tcg_gen_andi_i32(sr, cpu_sr, SR_T);
+ tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T));
tcg_gen_brcondi_i32(TCG_COND_NE, sr, 0, l1);
gen_goto_tb(ctx, 0, ifnott);
gen_set_label(l1);
@@ -255,7 +255,7 @@ static inline void gen_cmp(int cond, TCGv t0, TCGv t1)
t = tcg_temp_new();
tcg_gen_setcond_i32(cond, t, t1, t0);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, t);
tcg_temp_free(t);
@@ -267,7 +267,7 @@ static inline void gen_cmp_imm(int cond, TCGv t0, int32_t imm)
t = tcg_temp_new();
tcg_gen_setcondi_i32(cond, t, t0, imm);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, t);
tcg_temp_free(t);
@@ -323,10 +323,12 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
#define B11_8 ((ctx->opcode >> 8) & 0xf)
#define B15_12 ((ctx->opcode >> 12) & 0xf)
-#define REG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) == (SR_MD | SR_RB) \
+#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\
+ && (ctx->flags & (1u << SR_RB))\
? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
-#define ALTREG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) != (SR_MD | SR_RB)\
+#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\
+ || !(ctx->flags & (1u << SR_RB)))\
? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
@@ -356,7 +358,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
}
#define CHECK_FPU_ENABLED \
- if (ctx->flags & SR_FD) { \
+ if (ctx->flags & (1u << SR_FD)) { \
tcg_gen_movi_i32(cpu_pc, ctx->pc); \
if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
gen_helper_raise_slot_fpu_disable(cpu_env); \
@@ -406,7 +408,8 @@ static void _decode_opc(DisasContext * ctx)
switch (ctx->opcode) {
case 0x0019: /* div0u */
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(SR_M | SR_Q | SR_T));
+ tcg_gen_andi_i32(cpu_sr, cpu_sr,
+ ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)));
return;
case 0x000b: /* rts */
CHECK_NOT_DELAY_SLOT
@@ -419,10 +422,10 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_movi_i32(cpu_macl, 0);
return;
case 0x0048: /* clrs */
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S));
return;
case 0x0008: /* clrt */
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
return;
case 0x0038: /* ldtlb */
CHECK_PRIVILEGED
@@ -437,10 +440,10 @@ static void _decode_opc(DisasContext * ctx)
ctx->delayed_pc = (uint32_t) - 1;
return;
case 0x0058: /* sets */
- tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S);
+ tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
return;
case 0x0018: /* sett */
- tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
+ tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_T));
return;
case 0xfbfd: /* frchg */
tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
@@ -657,7 +660,7 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv t0, t1, t2;
t0 = tcg_temp_new();
- tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+ tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T));
t1 = tcg_temp_new();
tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8));
tcg_gen_add_i32(t0, t0, t1);
@@ -666,7 +669,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0);
tcg_gen_or_i32(t1, t1, t2);
tcg_temp_free(t2);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B11_8), t0);
@@ -685,7 +688,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_andc_i32(t1, t1, t2);
tcg_temp_free(t2);
tcg_gen_shri_i32(t1, t1, 31);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B7_4), t0);
@@ -714,7 +717,7 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv cmp1 = tcg_temp_new();
TCGv cmp2 = tcg_temp_new();
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8));
tcg_gen_andi_i32(cmp2, cmp1, 0xff000000);
tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
@@ -734,11 +737,11 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x2007: /* div0s Rm,Rn */
{
- gen_copy_bit_i32(cpu_sr, 8, REG(B11_8), 31); /* SR_Q */
- gen_copy_bit_i32(cpu_sr, 9, REG(B7_4), 31); /* SR_M */
+ gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */
+ gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */
TCGv val = tcg_temp_new();
tcg_gen_xor_i32(val, REG(B7_4), REG(B11_8));
- gen_copy_bit_i32(cpu_sr, 0, val, 31); /* SR_T */
+ gen_copy_bit_i32(cpu_sr, SR_T, val, 31); /* SR_T */
tcg_temp_free(val);
}
return;
@@ -827,9 +830,9 @@ static void _decode_opc(DisasContext * ctx)
t0 = tcg_temp_new();
tcg_gen_neg_i32(t0, REG(B7_4));
t1 = tcg_temp_new();
- tcg_gen_andi_i32(t1, cpu_sr, SR_T);
+ tcg_gen_andi_i32(t1, cpu_sr, (1u << SR_T));
tcg_gen_sub_i32(REG(B11_8), t0, t1);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_setcondi_i32(TCG_COND_GTU, t1, t0, 0);
tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
tcg_gen_setcond_i32(TCG_COND_GTU, t1, REG(B11_8), t0);
@@ -916,7 +919,7 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv t0, t1, t2;
t0 = tcg_temp_new();
- tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+ tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T));
t1 = tcg_temp_new();
tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4));
tcg_gen_sub_i32(t0, t1, t0);
@@ -925,7 +928,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0);
tcg_gen_or_i32(t1, t1, t2);
tcg_temp_free(t2);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B11_8), t0);
@@ -944,7 +947,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_and_i32(t1, t1, t2);
tcg_temp_free(t2);
tcg_gen_shri_i32(t1, t1, 31);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B11_8), t0);
@@ -1529,7 +1532,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
return;
case 0x0029: /* movt Rn */
- tcg_gen_andi_i32(REG(B11_8), cpu_sr, SR_T);
+ tcg_gen_andi_i32(REG(B11_8), cpu_sr, (1u << SR_T));
return;
case 0x0073:
/* MOVCO.L
@@ -1539,7 +1542,7 @@ static void _decode_opc(DisasContext * ctx)
*/
if (ctx->features & SH_FEATURE_SH4A) {
int label = gen_new_label();
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst);
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
@@ -1594,9 +1597,9 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv tmp = tcg_temp_new();
tcg_gen_mov_i32(tmp, cpu_sr);
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 31);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31);
tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
- gen_copy_bit_i32(REG(B11_8), 0, tmp, 0);
+ gen_copy_bit_i32(REG(B11_8), SR_T, tmp, 0);
tcg_temp_free(tmp);
}
return;
@@ -1604,7 +1607,7 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv tmp = tcg_temp_new();
tcg_gen_mov_i32(tmp, cpu_sr);
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
gen_copy_bit_i32(REG(B11_8), 31, tmp, 0);
tcg_temp_free(tmp);
@@ -1612,23 +1615,23 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x4004: /* rotl Rn */
tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1);
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
return;
case 0x4005: /* rotr Rn */
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4000: /* shll Rn */
case 0x4020: /* shal Rn */
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 31);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31);
tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4021: /* shar Rn */
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4001: /* shlr Rn */
- gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0);
+ gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4008: /* shll2 Rn */
@@ -1860,7 +1863,7 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
ctx.pc = pc_start;
ctx.flags = (uint32_t)tb->flags;
ctx.bstate = BS_NONE;
- ctx.memidx = (ctx.flags & SR_MD) == 0 ? 1 : 0;
+ ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0;
/* We don't know if the delayed pc came from a dynamic or static branch,
so assume it is a dynamic branch. */
ctx.delayed_pc = -1; /* use delayed pc from env pointer */
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 2/8] target-sh4: Split out T from SR
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
2013-12-21 16:58 ` [Qemu-devel] [PATCH 1/8] target-sh4: use bit number for SR constants Aurelien Jarno
@ 2013-12-21 16:58 ` Aurelien Jarno
2013-12-21 16:59 ` [Qemu-devel] [PATCH 3/8] target-sh4: optimize addc using add2 Aurelien Jarno
` (5 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:58 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
In preparation for more efficient setting of this field.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/cpu.h | 14 +++-
target-sh4/gdbstub.c | 4 +-
target-sh4/helper.c | 2 +-
target-sh4/op_helper.c | 32 ++------
target-sh4/translate.c | 205 ++++++++++++++++++++----------------------------
5 files changed, 110 insertions(+), 147 deletions(-)
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index c8ba70f..b7dd7ab 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -139,7 +139,8 @@ typedef struct CPUSH4State {
uint32_t flags; /* general execution flags */
uint32_t gregs[24]; /* general registers */
float32 fregs[32]; /* floating point registers */
- uint32_t sr; /* status register */
+ uint32_t sr; /* status register (with T split out) */
+ uint32_t sr_t; /* T bit of status register */
uint32_t ssr; /* saved status register */
uint32_t spc; /* saved program counter */
uint32_t gbr; /* global base register */
@@ -339,6 +340,17 @@ static inline int cpu_ptel_pr (uint32_t ptel)
#define TB_FLAG_PENDING_MOVCA (1 << 4)
+static inline target_ulong cpu_read_sr(CPUSH4State *env)
+{
+ return (env->sr & ~(1u << SR_T)) | (env->sr_t << SR_T);
+}
+
+static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr)
+{
+ env->sr_t = sr & (1u << SR_T);
+ env->sr = sr & ~(1u << SR_T);
+}
+
static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
diff --git a/target-sh4/gdbstub.c b/target-sh4/gdbstub.c
index 05ba728..a365a27 100644
--- a/target-sh4/gdbstub.c
+++ b/target-sh4/gdbstub.c
@@ -51,7 +51,7 @@ int superh_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
case 21:
return gdb_get_regl(mem_buf, env->macl);
case 22:
- return gdb_get_regl(mem_buf, env->sr);
+ return gdb_get_regl(mem_buf, cpu_read_sr(env));
case 23:
return gdb_get_regl(mem_buf, env->fpul);
case 24:
@@ -111,7 +111,7 @@ int superh_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
env->macl = ldl_p(mem_buf);
break;
case 22:
- env->sr = ldl_p(mem_buf);
+ cpu_write_sr(env, ldl_p(mem_buf));
break;
case 23:
env->fpul = ldl_p(mem_buf);
diff --git a/target-sh4/helper.c b/target-sh4/helper.c
index 07f8e91..a33db4d 100644
--- a/target-sh4/helper.c
+++ b/target-sh4/helper.c
@@ -162,7 +162,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
log_cpu_state(cs, 0);
}
- env->ssr = env->sr;
+ env->ssr = cpu_read_sr(env);
env->spc = env->pc;
env->sgr = env->gregs[15];
env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 6d56df2..0e881a8 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -166,11 +166,11 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
}
}
-#define T (env->sr & (1u << SR_T))
+#define T (env->sr_t)
#define Q (env->sr & (1u << SR_Q) ? 1 : 0)
#define M (env->sr & (1u << SR_M) ? 1 : 0)
-#define SETT (env->sr |= (1u << SR_T))
-#define CLRT (env->sr &= ~(1u << SR_T))
+#define SETT (env->sr_t = 1)
+#define CLRT (env->sr_t = 0)
#define SETQ (env->sr |= (1u << SR_Q))
#define CLRQ (env->sr &= ~(1u << SR_Q))
#define SETM (env->sr |= (1u << SR_M))
@@ -319,16 +319,6 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
}
}
-static inline void set_t(CPUSH4State *env)
-{
- env->sr |= (1u << SR_T);
-}
-
-static inline void clr_t(CPUSH4State *env)
-{
- env->sr &= ~(1u << SR_T);
-}
-
void helper_ld_fpscr(CPUSH4State *env, uint32_t val)
{
env->fpscr = val & FPSCR_MASK;
@@ -413,10 +403,8 @@ void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1)
relation = float32_compare(t0, t1, &env->fp_status);
if (unlikely(relation == float_relation_unordered)) {
update_fpscr(env, GETPC());
- } else if (relation == float_relation_equal) {
- set_t(env);
} else {
- clr_t(env);
+ env->sr_t = (relation == float_relation_equal);
}
}
@@ -428,10 +416,8 @@ void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
relation = float64_compare(t0, t1, &env->fp_status);
if (unlikely(relation == float_relation_unordered)) {
update_fpscr(env, GETPC());
- } else if (relation == float_relation_equal) {
- set_t(env);
} else {
- clr_t(env);
+ env->sr_t = (relation == float_relation_equal);
}
}
@@ -443,10 +429,8 @@ void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
relation = float32_compare(t0, t1, &env->fp_status);
if (unlikely(relation == float_relation_unordered)) {
update_fpscr(env, GETPC());
- } else if (relation == float_relation_greater) {
- set_t(env);
} else {
- clr_t(env);
+ env->sr_t = (relation == float_relation_greater);
}
}
@@ -458,10 +442,8 @@ void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
relation = float64_compare(t0, t1, &env->fp_status);
if (unlikely(relation == float_relation_unordered)) {
update_fpscr(env, GETPC());
- } else if (relation == float_relation_greater) {
- set_t(env);
} else {
- clr_t(env);
+ env->sr_t = (relation == float_relation_greater);
}
}
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 74425c3..d816936 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -59,7 +59,7 @@ enum {
/* global register indexes */
static TCGv_ptr cpu_env;
static TCGv cpu_gregs[24];
-static TCGv cpu_pc, cpu_sr, cpu_ssr, cpu_spc, cpu_gbr;
+static TCGv cpu_pc, cpu_sr, cpu_sr_t, cpu_ssr, cpu_spc, cpu_gbr;
static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
static TCGv cpu_fregs[32];
@@ -107,6 +107,8 @@ void sh4_translate_init(void)
offsetof(CPUSH4State, pc), "PC");
cpu_sr = tcg_global_mem_new_i32(TCG_AREG0,
offsetof(CPUSH4State, sr), "SR");
+ cpu_sr_t = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUSH4State, sr_t), "SR_T");
cpu_ssr = tcg_global_mem_new_i32(TCG_AREG0,
offsetof(CPUSH4State, ssr), "SSR");
cpu_spc = tcg_global_mem_new_i32(TCG_AREG0,
@@ -153,7 +155,7 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f,
CPUSH4State *env = &cpu->env;
int i;
cpu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n",
- env->pc, env->sr, env->pr, env->fpscr);
+ env->pc, cpu_read_sr(env), env->pr, env->fpscr);
cpu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n",
env->spc, env->ssr, env->gbr, env->vbr);
cpu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n",
@@ -171,6 +173,17 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f,
env->delayed_pc);
}
}
+static void gen_read_sr(TCGv dst)
+{
+ tcg_gen_andi_i32(dst, cpu_sr, ~(1u << SR_T));
+ tcg_gen_or_i32(dst, dst, cpu_sr_t);
+}
+
+static void gen_write_sr(TCGv src)
+{
+ tcg_gen_andi_i32(cpu_sr, src, ~(1u << SR_T));
+ tcg_gen_andi_i32(cpu_sr_t, src, (1u << SR_T));
+}
static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest)
{
@@ -207,12 +220,9 @@ static void gen_jump(DisasContext * ctx)
static inline void gen_branch_slot(uint32_t delayed_pc, int t)
{
- TCGv sr;
int label = gen_new_label();
tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
- sr = tcg_temp_new();
- tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T));
- tcg_gen_brcondi_i32(t ? TCG_COND_EQ:TCG_COND_NE, sr, 0, label);
+ tcg_gen_brcondi_i32(t ? TCG_COND_EQ : TCG_COND_NE, cpu_sr_t, 0, label);
tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
gen_set_label(label);
}
@@ -221,13 +231,8 @@ static inline void gen_branch_slot(uint32_t delayed_pc, int t)
static void gen_conditional_jump(DisasContext * ctx,
target_ulong ift, target_ulong ifnott)
{
- int l1;
- TCGv sr;
-
- l1 = gen_new_label();
- sr = tcg_temp_new();
- tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T));
- tcg_gen_brcondi_i32(TCG_COND_NE, sr, 0, l1);
+ int l1 = gen_new_label();
+ tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1);
gen_goto_tb(ctx, 0, ifnott);
gen_set_label(l1);
gen_goto_tb(ctx, 1, ift);
@@ -249,30 +254,6 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
gen_jump(ctx);
}
-static inline void gen_cmp(int cond, TCGv t0, TCGv t1)
-{
- TCGv t;
-
- t = tcg_temp_new();
- tcg_gen_setcond_i32(cond, t, t1, t0);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, t);
-
- tcg_temp_free(t);
-}
-
-static inline void gen_cmp_imm(int cond, TCGv t0, int32_t imm)
-{
- TCGv t;
-
- t = tcg_temp_new();
- tcg_gen_setcondi_i32(cond, t, t0, imm);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, t);
-
- tcg_temp_free(t);
-}
-
static inline void gen_store_flags(uint32_t flags)
{
tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
@@ -408,8 +389,8 @@ static void _decode_opc(DisasContext * ctx)
switch (ctx->opcode) {
case 0x0019: /* div0u */
- tcg_gen_andi_i32(cpu_sr, cpu_sr,
- ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)));
+ tcg_gen_andi_i32(cpu_sr, cpu_sr, ~((1u << SR_M) | (1u << SR_Q)));
+ tcg_gen_movi_i32(cpu_sr_t, 0);
return;
case 0x000b: /* rts */
CHECK_NOT_DELAY_SLOT
@@ -425,7 +406,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S));
return;
case 0x0008: /* clrt */
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
+ tcg_gen_movi_i32(cpu_sr_t, 0);
return;
case 0x0038: /* ldtlb */
CHECK_PRIVILEGED
@@ -434,7 +415,7 @@ static void _decode_opc(DisasContext * ctx)
case 0x002b: /* rte */
CHECK_PRIVILEGED
CHECK_NOT_DELAY_SLOT
- tcg_gen_mov_i32(cpu_sr, cpu_ssr);
+ gen_write_sr(cpu_ssr);
tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
ctx->flags |= DELAY_SLOT;
ctx->delayed_pc = (uint32_t) - 1;
@@ -443,7 +424,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
return;
case 0x0018: /* sett */
- tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_T));
+ tcg_gen_movi_i32(cpu_sr_t, 1);
return;
case 0xfbfd: /* frchg */
tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
@@ -658,22 +639,17 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x300e: /* addc Rm,Rn */
{
- TCGv t0, t1, t2;
+ TCGv t0, t1;
t0 = tcg_temp_new();
- tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T));
t1 = tcg_temp_new();
- tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8));
- tcg_gen_add_i32(t0, t0, t1);
- t2 = tcg_temp_new();
- tcg_gen_setcond_i32(TCG_COND_GTU, t2, REG(B11_8), t1);
- tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0);
- tcg_gen_or_i32(t1, t1, t2);
- tcg_temp_free(t2);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
- tcg_temp_free(t1);
- tcg_gen_mov_i32(REG(B11_8), t0);
+ tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
+ tcg_gen_add_i32(t1, cpu_sr_t, t0);
+ tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), t0);
+ tcg_gen_setcond_i32(TCG_COND_GTU, t0, t0, t1);
+ tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0);
tcg_temp_free(t0);
+ tcg_gen_mov_i32(REG(B11_8), t1);
+ tcg_temp_free(t1);
}
return;
case 0x300f: /* addv Rm,Rn */
@@ -685,11 +661,9 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_xor_i32(t1, t0, REG(B11_8));
t2 = tcg_temp_new();
tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
- tcg_gen_andc_i32(t1, t1, t2);
+ tcg_gen_andc_i32(cpu_sr_t, t1, t2);
tcg_temp_free(t2);
- tcg_gen_shri_i32(t1, t1, 31);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+ tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B7_4), t0);
tcg_temp_free(t0);
@@ -699,38 +673,36 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
return;
case 0x3000: /* cmp/eq Rm,Rn */
- gen_cmp(TCG_COND_EQ, REG(B7_4), REG(B11_8));
+ tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), REG(B7_4));
return;
case 0x3003: /* cmp/ge Rm,Rn */
- gen_cmp(TCG_COND_GE, REG(B7_4), REG(B11_8));
+ tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), REG(B7_4));
return;
case 0x3007: /* cmp/gt Rm,Rn */
- gen_cmp(TCG_COND_GT, REG(B7_4), REG(B11_8));
+ tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), REG(B7_4));
return;
case 0x3006: /* cmp/hi Rm,Rn */
- gen_cmp(TCG_COND_GTU, REG(B7_4), REG(B11_8));
+ tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), REG(B7_4));
return;
case 0x3002: /* cmp/hs Rm,Rn */
- gen_cmp(TCG_COND_GEU, REG(B7_4), REG(B11_8));
+ tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_t, REG(B11_8), REG(B7_4));
return;
case 0x200c: /* cmp/str Rm,Rn */
{
TCGv cmp1 = tcg_temp_new();
TCGv cmp2 = tcg_temp_new();
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8));
tcg_gen_andi_i32(cmp2, cmp1, 0xff000000);
- tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0);
tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000);
tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+ tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00);
tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+ tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff);
tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0);
- tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2);
+ tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2);
tcg_temp_free(cmp2);
tcg_temp_free(cmp1);
}
@@ -740,8 +712,8 @@ static void _decode_opc(DisasContext * ctx)
gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */
gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */
TCGv val = tcg_temp_new();
- tcg_gen_xor_i32(val, REG(B7_4), REG(B11_8));
- gen_copy_bit_i32(cpu_sr, SR_T, val, 31); /* SR_T */
+ tcg_gen_xor_i32(cpu_sr_t, REG(B7_4), REG(B11_8));
+ tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); /* SR_T */
tcg_temp_free(val);
}
return;
@@ -826,19 +798,13 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x600a: /* negc Rm,Rn */
{
- TCGv t0, t1;
- t0 = tcg_temp_new();
+ TCGv t0 = tcg_temp_new();
tcg_gen_neg_i32(t0, REG(B7_4));
- t1 = tcg_temp_new();
- tcg_gen_andi_i32(t1, cpu_sr, (1u << SR_T));
- tcg_gen_sub_i32(REG(B11_8), t0, t1);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_setcondi_i32(TCG_COND_GTU, t1, t0, 0);
- tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
- tcg_gen_setcond_i32(TCG_COND_GTU, t1, REG(B11_8), t0);
- tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+ tcg_gen_sub_i32(REG(B11_8), t0, cpu_sr_t);
+ tcg_gen_setcondi_i32(TCG_COND_GTU, cpu_sr_t, t0, 0);
+ tcg_gen_setcond_i32(TCG_COND_GTU, t0, REG(B11_8), t0);
+ tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0);
tcg_temp_free(t0);
- tcg_temp_free(t1);
}
return;
case 0x6007: /* not Rm,Rn */
@@ -919,17 +885,14 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv t0, t1, t2;
t0 = tcg_temp_new();
- tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T));
t1 = tcg_temp_new();
tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4));
- tcg_gen_sub_i32(t0, t1, t0);
+ tcg_gen_sub_i32(t0, t1, cpu_sr_t);
t2 = tcg_temp_new();
tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1);
tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0);
- tcg_gen_or_i32(t1, t1, t2);
+ tcg_gen_or_i32(cpu_sr_t, t1, t2);
tcg_temp_free(t2);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B11_8), t0);
tcg_temp_free(t0);
@@ -946,9 +909,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
tcg_gen_and_i32(t1, t1, t2);
tcg_temp_free(t2);
- tcg_gen_shri_i32(t1, t1, 31);
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+ tcg_gen_shri_i32(cpu_sr_t, t1, 31);
tcg_temp_free(t1);
tcg_gen_mov_i32(REG(B11_8), t0);
tcg_temp_free(t0);
@@ -958,7 +919,7 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv val = tcg_temp_new();
tcg_gen_and_i32(val, REG(B7_4), REG(B11_8));
- gen_cmp_imm(TCG_COND_EQ, val, 0);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
tcg_temp_free(val);
}
return;
@@ -1197,7 +1158,7 @@ static void _decode_opc(DisasContext * ctx)
ctx->flags |= DELAY_SLOT_CONDITIONAL;
return;
case 0x8800: /* cmp/eq #imm,R0 */
- gen_cmp_imm(TCG_COND_EQ, REG(0), B7_0s);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
return;
case 0xc400: /* mov.b @(disp,GBR),R0 */
{
@@ -1313,7 +1274,7 @@ static void _decode_opc(DisasContext * ctx)
{
TCGv val = tcg_temp_new();
tcg_gen_andi_i32(val, REG(0), B7_0);
- gen_cmp_imm(TCG_COND_EQ, val, 0);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
tcg_temp_free(val);
}
return;
@@ -1323,7 +1284,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_add_i32(val, REG(0), cpu_gbr);
tcg_gen_qemu_ld8u(val, val, ctx->memidx);
tcg_gen_andi_i32(val, val, B7_0);
- gen_cmp_imm(TCG_COND_EQ, val, 0);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
tcg_temp_free(val);
}
return;
@@ -1386,14 +1347,14 @@ static void _decode_opc(DisasContext * ctx)
ctx->delayed_pc = (uint32_t) - 1;
return;
case 0x4015: /* cmp/pl Rn */
- gen_cmp_imm(TCG_COND_GT, REG(B11_8), 0);
+ tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), 0);
return;
case 0x4011: /* cmp/pz Rn */
- gen_cmp_imm(TCG_COND_GE, REG(B11_8), 0);
+ tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), 0);
return;
case 0x4010: /* dt Rn */
tcg_gen_subi_i32(REG(B11_8), REG(B11_8), 1);
- gen_cmp_imm(TCG_COND_EQ, REG(B11_8), 0);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), 0);
return;
case 0x402b: /* jmp @Rn */
CHECK_NOT_DELAY_SLOT
@@ -1410,15 +1371,21 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x400e: /* ldc Rm,SR */
CHECK_PRIVILEGED
- tcg_gen_andi_i32(cpu_sr, REG(B11_8), 0x700083f3);
- ctx->bstate = BS_STOP;
+ {
+ TCGv val = tcg_temp_new();
+ tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3);
+ gen_write_sr(val);
+ tcg_temp_free(val);
+ ctx->bstate = BS_STOP;
+ }
return;
case 0x4007: /* ldc.l @Rm+,SR */
CHECK_PRIVILEGED
{
TCGv val = tcg_temp_new();
tcg_gen_qemu_ld32s(val, REG(B11_8), ctx->memidx);
- tcg_gen_andi_i32(cpu_sr, val, 0x700083f3);
+ tcg_gen_andi_i32(val, val, 0x700083f3);
+ gen_write_sr(val);
tcg_temp_free(val);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
ctx->bstate = BS_STOP;
@@ -1426,15 +1393,18 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x0002: /* stc SR,Rn */
CHECK_PRIVILEGED
- tcg_gen_mov_i32(REG(B11_8), cpu_sr);
+ gen_read_sr(REG(B11_8));
return;
case 0x4003: /* stc SR,@-Rn */
CHECK_PRIVILEGED
{
TCGv addr = tcg_temp_new();
+ TCGv val = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
- tcg_gen_qemu_st32(cpu_sr, addr, ctx->memidx);
+ gen_read_sr(val);
+ tcg_gen_qemu_st32(val, addr, ctx->memidx);
tcg_gen_mov_i32(REG(B11_8), addr);
+ tcg_temp_free(val);
tcg_temp_free(addr);
}
return;
@@ -1532,7 +1502,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
return;
case 0x0029: /* movt Rn */
- tcg_gen_andi_i32(REG(B11_8), cpu_sr, (1u << SR_T));
+ tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
return;
case 0x0073:
/* MOVCO.L
@@ -1542,8 +1512,7 @@ static void _decode_opc(DisasContext * ctx)
*/
if (ctx->features & SH_FEATURE_SH4A) {
int label = gen_new_label();
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst);
+ tcg_gen_mov_i32(cpu_sr, cpu_ldst);
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
gen_set_label(label);
@@ -1596,42 +1565,42 @@ static void _decode_opc(DisasContext * ctx)
case 0x4024: /* rotcl Rn */
{
TCGv tmp = tcg_temp_new();
- tcg_gen_mov_i32(tmp, cpu_sr);
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31);
+ tcg_gen_mov_i32(tmp, cpu_sr_t);
+ tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
- gen_copy_bit_i32(REG(B11_8), SR_T, tmp, 0);
+ tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
tcg_temp_free(tmp);
}
return;
case 0x4025: /* rotcr Rn */
{
TCGv tmp = tcg_temp_new();
- tcg_gen_mov_i32(tmp, cpu_sr);
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
+ tcg_gen_shli_i32(tmp, cpu_sr_t, 31);
+ tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
- gen_copy_bit_i32(REG(B11_8), 31, tmp, 0);
+ tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
tcg_temp_free(tmp);
}
return;
case 0x4004: /* rotl Rn */
tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1);
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
+ tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
return;
case 0x4005: /* rotr Rn */
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
+ tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4000: /* shll Rn */
case 0x4020: /* shal Rn */
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31);
+ tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4021: /* shar Rn */
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
+ tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4001: /* shlr Rn */
- gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0);
+ tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
return;
case 0x4008: /* shll2 Rn */
@@ -1659,7 +1628,7 @@ static void _decode_opc(DisasContext * ctx)
tcg_gen_mov_i32(addr, REG(B11_8));
val = tcg_temp_local_new();
tcg_gen_qemu_ld8u(val, addr, ctx->memidx);
- gen_cmp_imm(TCG_COND_EQ, val, 0);
+ tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
tcg_gen_ori_i32(val, val, 0x80);
tcg_gen_qemu_st8(val, addr, ctx->memidx);
tcg_temp_free(val);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 3/8] target-sh4: optimize addc using add2
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
2013-12-21 16:58 ` [Qemu-devel] [PATCH 1/8] target-sh4: use bit number for SR constants Aurelien Jarno
2013-12-21 16:58 ` [Qemu-devel] [PATCH 2/8] target-sh4: Split out T from SR Aurelien Jarno
@ 2013-12-21 16:59 ` Aurelien Jarno
2013-12-21 16:59 ` [Qemu-devel] [PATCH 4/8] target-sh4: optimize subc using sub2 Aurelien Jarno
` (4 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index d816936..17b4abe 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -639,17 +639,15 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x300e: /* addc Rm,Rn */
{
- TCGv t0, t1;
- t0 = tcg_temp_new();
+ TCGv t0, t1, t2;
+ t0 = tcg_const_tl(0);
t1 = tcg_temp_new();
- tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
- tcg_gen_add_i32(t1, cpu_sr_t, t0);
- tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), t0);
- tcg_gen_setcond_i32(TCG_COND_GTU, t0, t0, t1);
- tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0);
+ t2 = tcg_temp_new();
+ tcg_gen_add2_i32(t1, t2, REG(B11_8), t0, REG(B7_4), t0);
+ tcg_gen_add2_i32(REG(B11_8), cpu_sr_t, t1, t2, cpu_sr_t, t0);
tcg_temp_free(t0);
- tcg_gen_mov_i32(REG(B11_8), t1);
tcg_temp_free(t1);
+ tcg_temp_free(t2);
}
return;
case 0x300f: /* addv Rm,Rn */
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 4/8] target-sh4: optimize subc using sub2
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
` (2 preceding siblings ...)
2013-12-21 16:59 ` [Qemu-devel] [PATCH 3/8] target-sh4: optimize addc using add2 Aurelien Jarno
@ 2013-12-21 16:59 ` Aurelien Jarno
2013-12-21 16:59 ` [Qemu-devel] [PATCH 5/8] target-sh4: optimize negc using add2 and sub2 Aurelien Jarno
` (3 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 17b4abe..3af9ccd 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -882,18 +882,15 @@ static void _decode_opc(DisasContext * ctx)
case 0x300a: /* subc Rm,Rn */
{
TCGv t0, t1, t2;
- t0 = tcg_temp_new();
+ t0 = tcg_const_tl(0);
t1 = tcg_temp_new();
- tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4));
- tcg_gen_sub_i32(t0, t1, cpu_sr_t);
t2 = tcg_temp_new();
- tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1);
- tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0);
- tcg_gen_or_i32(cpu_sr_t, t1, t2);
- tcg_temp_free(t2);
- tcg_temp_free(t1);
- tcg_gen_mov_i32(REG(B11_8), t0);
+ tcg_gen_sub2_i32(t1, t2, REG(B11_8), t0, REG(B7_4), t0);
+ tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t, t1, t2, cpu_sr_t, t0);
+ tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
}
return;
case 0x300b: /* subv Rm,Rn */
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 5/8] target-sh4: optimize negc using add2 and sub2
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
` (3 preceding siblings ...)
2013-12-21 16:59 ` [Qemu-devel] [PATCH 4/8] target-sh4: optimize subc using sub2 Aurelien Jarno
@ 2013-12-21 16:59 ` Aurelien Jarno
2013-12-21 16:59 ` [Qemu-devel] [PATCH 6/8] target-sh4: split out Q and M from of SR and optimize div1 Aurelien Jarno
` (2 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 3af9ccd..3a3b2b6 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -796,12 +796,12 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x600a: /* negc Rm,Rn */
{
- TCGv t0 = tcg_temp_new();
- tcg_gen_neg_i32(t0, REG(B7_4));
- tcg_gen_sub_i32(REG(B11_8), t0, cpu_sr_t);
- tcg_gen_setcondi_i32(TCG_COND_GTU, cpu_sr_t, t0, 0);
- tcg_gen_setcond_i32(TCG_COND_GTU, t0, REG(B11_8), t0);
- tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0);
+ TCGv t0 = tcg_const_i32(0);
+ tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
+ REG(B7_4), t0, cpu_sr_t, t0);
+ tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
+ t0, t0, REG(B11_8), cpu_sr_t);
+ tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
tcg_temp_free(t0);
}
return;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 6/8] target-sh4: split out Q and M from of SR and optimize div1
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
` (4 preceding siblings ...)
2013-12-21 16:59 ` [Qemu-devel] [PATCH 5/8] target-sh4: optimize negc using add2 and sub2 Aurelien Jarno
@ 2013-12-21 16:59 ` Aurelien Jarno
2013-12-21 16:59 ` [Qemu-devel] [PATCH 7/8] target-sh4: factorize fmov implementation Aurelien Jarno
2013-12-21 16:59 ` [Qemu-devel] [PATCH 8/8] target-sh4: remove dead code Aurelien Jarno
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Splitting Q and M out of SR, it's possible to optimize div1 by using
TCG code instead of an helper.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/cpu.h | 13 ++++--
target-sh4/helper.h | 1 -
target-sh4/op_helper.c | 118 ------------------------------------------------
target-sh4/translate.c | 67 +++++++++++++++++++++++----
4 files changed, 68 insertions(+), 131 deletions(-)
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index b7dd7ab..82221c8 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -140,6 +140,8 @@ typedef struct CPUSH4State {
uint32_t gregs[24]; /* general registers */
float32 fregs[32]; /* floating point registers */
uint32_t sr; /* status register (with T split out) */
+ uint32_t sr_m; /* M bit of status register */
+ uint32_t sr_q; /* Q bit of status register */
uint32_t sr_t; /* T bit of status register */
uint32_t ssr; /* saved status register */
uint32_t spc; /* saved program counter */
@@ -342,13 +344,18 @@ static inline int cpu_ptel_pr (uint32_t ptel)
static inline target_ulong cpu_read_sr(CPUSH4State *env)
{
- return (env->sr & ~(1u << SR_T)) | (env->sr_t << SR_T);
+ return (env->sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T))) |
+ (env->sr_m << SR_M) |
+ (env->sr_q << SR_Q) |
+ (env->sr_t << SR_T);
}
static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr)
{
- env->sr_t = sr & (1u << SR_T);
- env->sr = sr & ~(1u << SR_T);
+ env->sr_m = (sr >> SR_M) & 1;
+ env->sr_q = (sr >> SR_Q) & 1;
+ env->sr_t = (sr >> SR_T) & 1;
+ env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T));
}
static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 7162448..fbbe264 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -13,7 +13,6 @@ DEF_HELPER_3(movcal, void, env, i32, i32)
DEF_HELPER_1(discard_movcal_backup, void, env)
DEF_HELPER_2(ocbi, void, env, i32)
-DEF_HELPER_3(div1, i32, env, i32, i32)
DEF_HELPER_3(macl, void, env, i32, i32)
DEF_HELPER_3(macw, void, env, i32, i32)
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 0e881a8..3ed0e2d 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -166,124 +166,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
}
}
-#define T (env->sr_t)
-#define Q (env->sr & (1u << SR_Q) ? 1 : 0)
-#define M (env->sr & (1u << SR_M) ? 1 : 0)
-#define SETT (env->sr_t = 1)
-#define CLRT (env->sr_t = 0)
-#define SETQ (env->sr |= (1u << SR_Q))
-#define CLRQ (env->sr &= ~(1u << SR_Q))
-#define SETM (env->sr |= (1u << SR_M))
-#define CLRM (env->sr &= ~(1u << SR_M))
-
-uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-{
- uint32_t tmp0, tmp2;
- uint8_t old_q, tmp1 = 0xff;
-
- //printf("div1 arg0=0x%08x arg1=0x%08x M=%d Q=%d T=%d\n", arg0, arg1, M, Q, T);
- old_q = Q;
- if ((0x80000000 & arg1) != 0)
- SETQ;
- else
- CLRQ;
- tmp2 = arg0;
- arg1 <<= 1;
- arg1 |= T;
- switch (old_q) {
- case 0:
- switch (M) {
- case 0:
- tmp0 = arg1;
- arg1 -= tmp2;
- tmp1 = arg1 > tmp0;
- switch (Q) {
- case 0:
- if (tmp1)
- SETQ;
- else
- CLRQ;
- break;
- case 1:
- if (tmp1 == 0)
- SETQ;
- else
- CLRQ;
- break;
- }
- break;
- case 1:
- tmp0 = arg1;
- arg1 += tmp2;
- tmp1 = arg1 < tmp0;
- switch (Q) {
- case 0:
- if (tmp1 == 0)
- SETQ;
- else
- CLRQ;
- break;
- case 1:
- if (tmp1)
- SETQ;
- else
- CLRQ;
- break;
- }
- break;
- }
- break;
- case 1:
- switch (M) {
- case 0:
- tmp0 = arg1;
- arg1 += tmp2;
- tmp1 = arg1 < tmp0;
- switch (Q) {
- case 0:
- if (tmp1)
- SETQ;
- else
- CLRQ;
- break;
- case 1:
- if (tmp1 == 0)
- SETQ;
- else
- CLRQ;
- break;
- }
- break;
- case 1:
- tmp0 = arg1;
- arg1 -= tmp2;
- tmp1 = arg1 > tmp0;
- switch (Q) {
- case 0:
- if (tmp1 == 0)
- SETQ;
- else
- CLRQ;
- break;
- case 1:
- if (tmp1)
- SETQ;
- else
- CLRQ;
- break;
- }
- break;
- }
- break;
- }
- if (Q == M)
- SETT;
- else
- CLRT;
- //printf("Output: arg1=0x%08x M=%d Q=%d T=%d\n", arg1, M, Q, T);
- return arg1;
-}
-
void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
{
int64_t res;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 3a3b2b6..26b45c1 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -59,7 +59,8 @@ enum {
/* global register indexes */
static TCGv_ptr cpu_env;
static TCGv cpu_gregs[24];
-static TCGv cpu_pc, cpu_sr, cpu_sr_t, cpu_ssr, cpu_spc, cpu_gbr;
+static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
+static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
static TCGv cpu_fregs[32];
@@ -107,6 +108,10 @@ void sh4_translate_init(void)
offsetof(CPUSH4State, pc), "PC");
cpu_sr = tcg_global_mem_new_i32(TCG_AREG0,
offsetof(CPUSH4State, sr), "SR");
+ cpu_sr_m = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUSH4State, sr_m), "SR_M");
+ cpu_sr_q = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUSH4State, sr_q), "SR_Q");
cpu_sr_t = tcg_global_mem_new_i32(TCG_AREG0,
offsetof(CPUSH4State, sr_t), "SR_T");
cpu_ssr = tcg_global_mem_new_i32(TCG_AREG0,
@@ -175,14 +180,28 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f,
}
static void gen_read_sr(TCGv dst)
{
- tcg_gen_andi_i32(dst, cpu_sr, ~(1u << SR_T));
- tcg_gen_or_i32(dst, dst, cpu_sr_t);
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_i32(dst, cpu_sr,
+ ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
+ tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q);
+ tcg_gen_or_i32(dst, dst, t0);
+ tcg_gen_shli_i32(t0, cpu_sr_m, SR_M);
+ tcg_gen_or_i32(dst, dst, t0);
+ tcg_gen_shli_i32(t0, cpu_sr_t, SR_T);
+ tcg_gen_or_i32(dst, dst, t0);
+ tcg_temp_free_i32(t0);
}
static void gen_write_sr(TCGv src)
{
- tcg_gen_andi_i32(cpu_sr, src, ~(1u << SR_T));
- tcg_gen_andi_i32(cpu_sr_t, src, (1u << SR_T));
+ tcg_gen_andi_i32(cpu_sr, src,
+ ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
+ tcg_gen_shri_i32(cpu_sr_q, src, SR_Q);
+ tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1);
+ tcg_gen_shri_i32(cpu_sr_m, src, SR_M);
+ tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1);
+ tcg_gen_shri_i32(cpu_sr_t, src, SR_T);
+ tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
}
static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest)
@@ -389,7 +408,8 @@ static void _decode_opc(DisasContext * ctx)
switch (ctx->opcode) {
case 0x0019: /* div0u */
- tcg_gen_andi_i32(cpu_sr, cpu_sr, ~((1u << SR_M) | (1u << SR_Q)));
+ tcg_gen_movi_i32(cpu_sr_m, 0);
+ tcg_gen_movi_i32(cpu_sr_q, 0);
tcg_gen_movi_i32(cpu_sr_t, 0);
return;
case 0x000b: /* rts */
@@ -707,8 +727,8 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0x2007: /* div0s Rm,Rn */
{
- gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */
- gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */
+ tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31); /* SR_Q */
+ tcg_gen_mov_i32(cpu_sr_m, cpu_sr_q); /* SR_M */
TCGv val = tcg_temp_new();
tcg_gen_xor_i32(cpu_sr_t, REG(B7_4), REG(B11_8));
tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); /* SR_T */
@@ -716,7 +736,36 @@ static void _decode_opc(DisasContext * ctx)
}
return;
case 0x3004: /* div1 Rm,Rn */
- gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
+ {
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv zero = tcg_const_i32(0);
+
+ /* shift left arg1, saving the bit being pushed out and inserting
+ T on the right */
+ tcg_gen_shri_i32(t0, REG(B11_8), 31);
+ tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
+ tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t);
+
+ /* add or subtract arg0 from arg1 depending if Q == M */
+ tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m);
+ tcg_gen_subi_i32(t1, t1, 1);
+ tcg_gen_neg_i32(t2, REG(B7_4));
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2);
+ tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1);
+
+ /* compute T and Q depending on carry */
+ tcg_gen_andi_i32(t1, t1, 1);
+ tcg_gen_xor_i32(t1, t1, t0);
+ tcg_gen_xori_i32(cpu_sr_t, t1, 1);
+ tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1);
+
+ tcg_temp_free(zero);
+ tcg_temp_free(t2);
+ tcg_temp_free(t1);
+ tcg_temp_free(t0);
+ }
return;
case 0x300d: /* dmuls.l Rm,Rn */
tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 7/8] target-sh4: factorize fmov implementation
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
` (5 preceding siblings ...)
2013-12-21 16:59 ` [Qemu-devel] [PATCH 6/8] target-sh4: split out Q and M from of SR and optimize div1 Aurelien Jarno
@ 2013-12-21 16:59 ` Aurelien Jarno
2013-12-21 18:52 ` Peter Maydell
2013-12-21 16:59 ` [Qemu-devel] [PATCH 8/8] target-sh4: remove dead code Aurelien Jarno
7 siblings, 1 reply; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 31 ++++++++++++++-----------------
1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 26b45c1..23d51c6 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1024,23 +1024,20 @@ static void _decode_opc(DisasContext * ctx)
return;
case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
CHECK_FPU_ENABLED
- if (ctx->flags & FPSCR_SZ) {
- TCGv addr = tcg_temp_new_i32();
- int fr = XREG(B7_4);
- tcg_gen_subi_i32(addr, REG(B11_8), 4);
- tcg_gen_qemu_st32(cpu_fregs[fr+1], addr, ctx->memidx);
- tcg_gen_subi_i32(addr, addr, 4);
- tcg_gen_qemu_st32(cpu_fregs[fr ], addr, ctx->memidx);
- tcg_gen_mov_i32(REG(B11_8), addr);
- tcg_temp_free(addr);
- } else {
- TCGv addr;
- addr = tcg_temp_new_i32();
- tcg_gen_subi_i32(addr, REG(B11_8), 4);
- tcg_gen_qemu_st32(cpu_fregs[FREG(B7_4)], addr, ctx->memidx);
- tcg_gen_mov_i32(REG(B11_8), addr);
- tcg_temp_free(addr);
- }
+ {
+ const int fr = XREG(B7_4);
+ TCGv addr = tcg_temp_new_i32();
+ tcg_gen_subi_i32(addr, REG(B11_8), 4);
+ if (ctx->flags & FPSCR_SZ) {
+ tcg_gen_qemu_st32(cpu_fregs[fr+1], addr, ctx->memidx);
+ tcg_gen_subi_i32(addr, addr, 4);
+ tcg_gen_qemu_st32(cpu_fregs[fr], addr, ctx->memidx);
+ } else {
+ tcg_gen_qemu_st32(cpu_fregs[fr], addr, ctx->memidx);
+ }
+ tcg_gen_mov_i32(REG(B11_8), addr);
+ tcg_temp_free(addr);
+ }
return;
case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
CHECK_FPU_ENABLED
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] target-sh4: factorize fmov implementation
2013-12-21 16:59 ` [Qemu-devel] [PATCH 7/8] target-sh4: factorize fmov implementation Aurelien Jarno
@ 2013-12-21 18:52 ` Peter Maydell
2013-12-22 11:25 ` Aurelien Jarno
0 siblings, 1 reply; 11+ messages in thread
From: Peter Maydell @ 2013-12-21 18:52 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: QEMU Developers
On 21 December 2013 16:59, Aurelien Jarno <aurelien@aurel32.net> wrote:
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
> target-sh4/translate.c | 31 ++++++++++++++-----------------
> 1 file changed, 14 insertions(+), 17 deletions(-)
>
> diff --git a/target-sh4/translate.c b/target-sh4/translate.c
> index 26b45c1..23d51c6 100644
> --- a/target-sh4/translate.c
> +++ b/target-sh4/translate.c
> @@ -1024,23 +1024,20 @@ static void _decode_opc(DisasContext * ctx)
> return;
> case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
> CHECK_FPU_ENABLED
> - if (ctx->flags & FPSCR_SZ) {
> - TCGv addr = tcg_temp_new_i32();
> - int fr = XREG(B7_4);
> - tcg_gen_subi_i32(addr, REG(B11_8), 4);
> - tcg_gen_qemu_st32(cpu_fregs[fr+1], addr, ctx->memidx);
> - tcg_gen_subi_i32(addr, addr, 4);
> - tcg_gen_qemu_st32(cpu_fregs[fr ], addr, ctx->memidx);
> - tcg_gen_mov_i32(REG(B11_8), addr);
> - tcg_temp_free(addr);
> - } else {
> - TCGv addr;
> - addr = tcg_temp_new_i32();
> - tcg_gen_subi_i32(addr, REG(B11_8), 4);
> - tcg_gen_qemu_st32(cpu_fregs[FREG(B7_4)], addr, ctx->memidx);
> - tcg_gen_mov_i32(REG(B11_8), addr);
> - tcg_temp_free(addr);
> - }
> + {
> + const int fr = XREG(B7_4);
> + TCGv addr = tcg_temp_new_i32();
> + tcg_gen_subi_i32(addr, REG(B11_8), 4);
> + if (ctx->flags & FPSCR_SZ) {
> + tcg_gen_qemu_st32(cpu_fregs[fr+1], addr, ctx->memidx);
> + tcg_gen_subi_i32(addr, addr, 4);
> + tcg_gen_qemu_st32(cpu_fregs[fr], addr, ctx->memidx);
> + } else {
> + tcg_gen_qemu_st32(cpu_fregs[fr], addr, ctx->memidx);
> + }
> + tcg_gen_mov_i32(REG(B11_8), addr);
> + tcg_temp_free(addr);
> + }
> return;
> case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
> CHECK_FPU_ENABLED
Isn't this going to conflict with the "update to new qemu ld/st ops"
patch you posted earlier?
thanks
-- PMM
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH 7/8] target-sh4: factorize fmov implementation
2013-12-21 18:52 ` Peter Maydell
@ 2013-12-22 11:25 ` Aurelien Jarno
0 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-22 11:25 UTC (permalink / raw)
To: Peter Maydell; +Cc: QEMU Developers
On Sat, Dec 21, 2013 at 06:52:26PM +0000, Peter Maydell wrote:
> On 21 December 2013 16:59, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> > ---
> > target-sh4/translate.c | 31 ++++++++++++++-----------------
> > 1 file changed, 14 insertions(+), 17 deletions(-)
> >
> > diff --git a/target-sh4/translate.c b/target-sh4/translate.c
> > index 26b45c1..23d51c6 100644
> > --- a/target-sh4/translate.c
> > +++ b/target-sh4/translate.c
> > @@ -1024,23 +1024,20 @@ static void _decode_opc(DisasContext * ctx)
> > return;
> > case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
> > CHECK_FPU_ENABLED
> > - if (ctx->flags & FPSCR_SZ) {
> > - TCGv addr = tcg_temp_new_i32();
> > - int fr = XREG(B7_4);
> > - tcg_gen_subi_i32(addr, REG(B11_8), 4);
> > - tcg_gen_qemu_st32(cpu_fregs[fr+1], addr, ctx->memidx);
> > - tcg_gen_subi_i32(addr, addr, 4);
> > - tcg_gen_qemu_st32(cpu_fregs[fr ], addr, ctx->memidx);
> > - tcg_gen_mov_i32(REG(B11_8), addr);
> > - tcg_temp_free(addr);
> > - } else {
> > - TCGv addr;
> > - addr = tcg_temp_new_i32();
> > - tcg_gen_subi_i32(addr, REG(B11_8), 4);
> > - tcg_gen_qemu_st32(cpu_fregs[FREG(B7_4)], addr, ctx->memidx);
> > - tcg_gen_mov_i32(REG(B11_8), addr);
> > - tcg_temp_free(addr);
> > - }
> > + {
> > + const int fr = XREG(B7_4);
> > + TCGv addr = tcg_temp_new_i32();
> > + tcg_gen_subi_i32(addr, REG(B11_8), 4);
> > + if (ctx->flags & FPSCR_SZ) {
> > + tcg_gen_qemu_st32(cpu_fregs[fr+1], addr, ctx->memidx);
> > + tcg_gen_subi_i32(addr, addr, 4);
> > + tcg_gen_qemu_st32(cpu_fregs[fr], addr, ctx->memidx);
> > + } else {
> > + tcg_gen_qemu_st32(cpu_fregs[fr], addr, ctx->memidx);
> > + }
> > + tcg_gen_mov_i32(REG(B11_8), addr);
> > + tcg_temp_free(addr);
> > + }
> > return;
> > case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
> > CHECK_FPU_ENABLED
>
> Isn't this going to conflict with the "update to new qemu ld/st ops"
> patch you posted earlier?
They indeed conflicts, and I haven't realized that before as I
developed them separately. This patchset was actually sitting for quite
some time in my git. I'll post a new version soon.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 8/8] target-sh4: remove dead code
2013-12-21 16:58 [Qemu-devel] [PATCH 0/8] target-sh4: optimizations and cleanups Aurelien Jarno
` (6 preceding siblings ...)
2013-12-21 16:59 ` [Qemu-devel] [PATCH 7/8] target-sh4: factorize fmov implementation Aurelien Jarno
@ 2013-12-21 16:59 ` Aurelien Jarno
7 siblings, 0 replies; 11+ messages in thread
From: Aurelien Jarno @ 2013-12-21 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sh4/translate.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 23d51c6..68767c6 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -18,7 +18,6 @@
*/
#define DEBUG_DISAS
-//#define SH4_SINGLE_STEP
#include "cpu.h"
#include "disas/disas.h"
--
1.7.10.4
^ permalink raw reply related [flat|nested] 11+ messages in thread