From: Laurent Desnogues <laurent.desnogues@gmail.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers
Date: Sun, 13 Sep 2009 23:00:08 +0200 [thread overview]
Message-ID: <761ea48b0909131400i33efc212nce026adb75a4f5d2@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1637 bytes --]
Hello,
this patch is a proposal to use globals for the 8 or 16 CPU
registers on i386 and x86_64.
I measured the improvement in the following conditions:
- Machine: i7 920
- Software: Fedora11 x86_64 gcc 4.4.1
- Benchmark: SPEC2000 gcc with expr.i input
- User mode
- i386 and x86_64 hosts and targets, with and without the patch
(8 combinations)
The results are:
qemu-i386_on-i386 15.82user 0.05system 0:15.91elapsed
qemu-i386_on-i386-reg 15.40user 0.02system 0:15.43elapsed
qemu-i386_on-x86_64 15.65user 0.05system 0:15.71elapsed
qemu-i386_on-x86_64-reg 15.11user 0.03system 0:15.15elapsed
qemu-x86_64_on-i386 mmap: No such device or address
qemu-x86_64_on-i386-reg mmap: No such device or address
qemu-x86_64_on-x86_64 18.42user 0.07system 0:18.49elapsed
qemu-x86_64_on-x86_64-reg 13.22user 0.06system 0:13.31elapsed
Given my lack of knowledge of system QEMU, I will leave it to
someone else to measure the speedup.
A previous version of that patch, that only handled i386 target,
was tested by Malc who got speedup running OpenSSL on his G4. It
was also sent to Fabrice who asked me to send it to the mailing
list.
The usage of globals is controlled by USE_REGS so that reviewers
can quickly test the benefit (or the lack of it).
Comments are welcome (except for the obvious presence of //
which is only temporary). I need to optimize a few things once
I'm sure the temporaries (cpu_tmp0, ...) are not used outside of
the modified functions. x86_64 was coded in a hurry and is
perhaps buggy.
Laurent
Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
[-- Attachment #2: i386-reg2.patch --]
[-- Type: text/x-diff, Size: 22135 bytes --]
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 335fc08..dc2fcde 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,10 +58,15 @@
//#define MACRO_TEST 1
+#define USE_REGS
+
/* global register indexes */
static TCGv_ptr cpu_env;
static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
static TCGv_i32 cpu_cc_op;
+#ifdef USE_REGS
+static TCGv cpu_regs[CPU_NB_REGS];
+#endif
/* local temps */
static TCGv cpu_T[2], cpu_T3;
/* local register indexes (only used inside old micro ops) */
@@ -269,70 +274,95 @@ static inline void gen_op_andl_A0_ffff(void)
#define REG_LH_OFFSET 4
#endif
+#ifdef USE_REGS
+#ifdef TARGET_X86_64
+/* #warning NYI */
+#endif
+
static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
{
+ TCGv tmp;
+
switch(ot) {
case OT_BYTE:
+ tmp = tcg_temp_new();
+ tcg_gen_andi_tl(tmp, t0, 0xff);
if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
- tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
} else {
- tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+ tcg_gen_shli_tl(tmp, tmp, 8);
+ tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
+ tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
+ //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
}
+ tcg_temp_free(tmp);
break;
case OT_WORD:
- tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+ tmp = tcg_temp_new();
+ tcg_gen_andi_tl(tmp, t0, 0xffff);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ //tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
break;
#ifdef TARGET_X86_64
case OT_LONG:
- tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
/* high part of register set to zero */
- tcg_gen_movi_tl(cpu_tmp0, 0);
- tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+ tcg_gen_ext32u_tl(cpu_regs[reg], t0);
+ //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ /* high part of register set to zero */
+ //tcg_gen_movi_tl(cpu_tmp0, 0);
+ //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
break;
default:
case OT_QUAD:
- tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+ //tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
break;
#else
default:
case OT_LONG:
- tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+ //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
break;
#endif
}
}
-static inline void gen_op_mov_reg_T0(int ot, int reg)
-{
- gen_op_mov_reg_v(ot, reg, cpu_T[0]);
-}
-
-static inline void gen_op_mov_reg_T1(int ot, int reg)
-{
- gen_op_mov_reg_v(ot, reg, cpu_T[1]);
-}
-
static inline void gen_op_mov_reg_A0(int size, int reg)
{
+ TCGv tmp;
+
switch(size) {
case 0:
- tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+ tmp = tcg_temp_new();
+ tcg_gen_andi_tl(tmp, cpu_A0, 0xffff);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ //tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
break;
#ifdef TARGET_X86_64
case 1:
- tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
/* high part of register set to zero */
- tcg_gen_movi_tl(cpu_tmp0, 0);
- tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
+ //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ /* high part of register set to zero */
+ //tcg_gen_movi_tl(cpu_tmp0, 0);
+ //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
break;
default:
case 2:
- tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+ //tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
break;
#else
default:
case 1:
- tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+ //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
break;
#endif
}
@@ -345,59 +375,213 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
goto std_case;
} else {
- tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+ tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
+ tcg_gen_andi_tl(t0, t0, 0xff);
+ //tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
}
break;
default:
std_case:
- tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_tl(t0, cpu_regs[reg]);
+ //tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
break;
}
}
-static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+static inline void gen_op_movl_A0_reg(int reg)
{
- gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+ tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+ //tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
}
-static inline void gen_op_movl_A0_reg(int reg)
+static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
{
- tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ TCGv tmp;
+
+ switch(size) {
+ case 0:
+ // TODO optimize
+ tmp = tcg_temp_new();
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+ tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+ //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+ break;
+ case 1:
+ // TODO optimize
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+ //#ifdef TARGET_X86_64
+ //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+ //#endif
+ //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+ //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
+#endif
+ }
}
-static inline void gen_op_addl_A0_im(int32_t val)
+static inline void gen_op_add_reg_T0(int size, int reg)
{
- tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+ TCGv tmp;
+
+ switch(size) {
+ case 0:
+ // TODO optimize
+ tmp = tcg_temp_new();
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+ tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+ //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+ break;
+ case 1:
+ // TODO optimize
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
#ifdef TARGET_X86_64
- tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+ //#ifdef TARGET_X86_64
+ //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+ //#endif
+ //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+ //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
#endif
+ }
}
-#ifdef TARGET_X86_64
-static inline void gen_op_addq_A0_im(int64_t val)
+static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
{
- tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ if (shift != 0)
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+
+ //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+ //if (shift != 0)
+ // tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+ //tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+ //#ifdef TARGET_X86_64
+ //tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+ //#endif
}
+
+#else
+
+static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
+{
+ switch(ot) {
+ case OT_BYTE:
+ if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+ tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+ } else {
+ tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+ }
+ break;
+ case OT_WORD:
+ tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+ break;
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ /* high part of register set to zero */
+ tcg_gen_movi_tl(cpu_tmp0, 0);
+ tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+ break;
+ default:
+ case OT_QUAD:
+ tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
+#else
+ default:
+ case OT_LONG:
+ tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ break;
#endif
-
-static void gen_add_A0_im(DisasContext *s, int val)
+ }
+}
+
+static inline void gen_op_mov_reg_A0(int size, int reg)
{
+ switch(size) {
+ case 0:
+ tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+ break;
#ifdef TARGET_X86_64
- if (CODE64(s))
- gen_op_addq_A0_im(val);
- else
+ case 1:
+ tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ /* high part of register set to zero */
+ tcg_gen_movi_tl(cpu_tmp0, 0);
+ tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+ break;
+ default:
+ case 2:
+ tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
+#else
+ default:
+ case 1:
+ tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+ break;
#endif
- gen_op_addl_A0_im(val);
+ }
}
-static inline void gen_op_addl_T0_T1(void)
+static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
{
- tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ switch(ot) {
+ case OT_BYTE:
+ if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+ goto std_case;
+ } else {
+ tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+ }
+ break;
+ default:
+ std_case:
+ tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+ break;
+ }
}
-static inline void gen_op_jmp_T0(void)
+static inline void gen_op_movl_A0_reg(int reg)
{
- tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+ tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
}
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
@@ -452,15 +636,10 @@ static inline void gen_op_add_reg_T0(int size, int reg)
}
}
-static inline void gen_op_set_cc_op(int32_t val)
-{
- tcg_gen_movi_i32(cpu_cc_op, val);
-}
-
static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
{
tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
- if (shift != 0)
+ if (shift != 0)
tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
#ifdef TARGET_X86_64
@@ -468,6 +647,63 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
#endif
}
+#endif
+
+static inline void gen_op_mov_reg_T0(int ot, int reg)
+{
+ gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+}
+
+static inline void gen_op_mov_reg_T1(int ot, int reg)
+{
+ gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+}
+
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+ gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+}
+
+static inline void gen_op_addl_A0_im(int32_t val)
+{
+ tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_addq_A0_im(int64_t val)
+{
+ tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+}
+#endif
+
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s))
+ gen_op_addq_A0_im(val);
+ else
+#endif
+ gen_op_addl_A0_im(val);
+}
+
+static inline void gen_op_addl_T0_T1(void)
+{
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_jmp_T0(void)
+{
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+ tcg_gen_movi_i32(cpu_cc_op, val);
+}
+
static inline void gen_op_movl_A0_seg(int reg)
{
tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
@@ -496,13 +732,21 @@ static inline void gen_op_addq_A0_seg(int reg)
static inline void gen_op_movq_A0_reg(int reg)
{
+#ifdef USE_REGS
+ tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+#else
tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+#endif
}
static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
{
+#ifdef USE_REGS
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+#else
tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
- if (shift != 0)
+#endif
+ if (shift != 0)
tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
}
@@ -701,14 +945,22 @@ static void gen_exts(int ot, TCGv reg)
static inline void gen_op_jnz_ecx(int size, int label1)
{
+#ifdef USE_REGS
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+#else
tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
+#endif
gen_extu(size + 1, cpu_tmp0);
tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
}
static inline void gen_op_jz_ecx(int size, int label1)
{
+#ifdef USE_REGS
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+#else
tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
+#endif
gen_extu(size + 1, cpu_tmp0);
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
}
@@ -4834,7 +5086,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
rm = 0; /* avoid warning */
}
label1 = gen_new_label();
+#ifdef USE_REGS
+ tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
+#else
tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
+#endif
tcg_gen_sub_tl(t2, t2, t0);
gen_extu(ot, t2);
tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
@@ -5409,7 +5665,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
val = ldub_code(s->pc++);
tcg_gen_movi_tl(cpu_T3, val);
} else {
+#ifdef USE_REGS
+ tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
+#else
tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));
+#endif
}
gen_shiftd_rm_T1_T3(s, ot, opreg, op);
break;
@@ -6317,10 +6577,18 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
/* XXX: specific Intel behaviour ? */
l1 = gen_new_label();
gen_jcc1(s, s->cc_op, b ^ 1, l1);
+#ifdef USE_REGS
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+#else
tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+#endif
gen_set_label(l1);
+#ifdef USE_REGS
+ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
+#else
tcg_gen_movi_tl(cpu_tmp0, 0);
tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+#endif
} else
#endif
{
@@ -7588,6 +7856,60 @@ void optimize_flags_init(void)
cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
"cc_tmp");
+#ifdef USE_REGS
+#ifdef TARGET_X86_64
+ cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EAX]), "rax");
+ cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ECX]), "rcx");
+ cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDX]), "rdx");
+ cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBX]), "rbx");
+ cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESP]), "rsp");
+ cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBP]), "rbp");
+ cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESI]), "rsi");
+ cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDI]), "rdi");
+ cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[8]), "r8");
+ cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[9]), "r9");
+ cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[10]), "r10");
+ cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[11]), "r11");
+ cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[12]), "r12");
+ cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[13]), "r13");
+ cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[14]), "r14");
+ cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[15]), "r15");
+#else
+ cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EAX]), "eax");
+ cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ECX]), "ecx");
+ cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDX]), "edx");
+ cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBX]), "ebx");
+ cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESP]), "esp");
+ cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBP]), "ebp");
+ cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESI]), "esi");
+ cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDI]), "edi");
+#endif
+#endif
+
/* register helpers */
#define GEN_HELPER 2
#include "helper.h"
next reply other threads:[~2009-09-13 21:00 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-13 21:00 Laurent Desnogues [this message]
2009-09-26 22:41 ` [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers Aurelien Jarno
2009-09-27 12:51 ` Aurelien Jarno
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=761ea48b0909131400i33efc212nce026adb75a4f5d2@mail.gmail.com \
--to=laurent.desnogues@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).