qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers
@ 2009-09-13 21:00 Laurent Desnogues
  2009-09-26 22:41 ` Aurelien Jarno
  2009-09-27 12:51 ` Aurelien Jarno
  0 siblings, 2 replies; 3+ messages in thread
From: Laurent Desnogues @ 2009-09-13 21:00 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1637 bytes --]

Hello,

this patch is a proposal to use globals for the 8 or 16 CPU
registers on i386 and x86_64.

I measured the improvement in the following conditions:

  - Machine:  i7 920
  - Software:  Fedora11 x86_64 gcc 4.4.1
  - Benchmark: SPEC2000 gcc with expr.i input
  - User mode
  - i386 and x86_64 hosts and targets, with and without the patch
    (8 combinations)

The results are:

qemu-i386_on-i386          15.82user 0.05system 0:15.91elapsed
qemu-i386_on-i386-reg      15.40user 0.02system 0:15.43elapsed
qemu-i386_on-x86_64        15.65user 0.05system 0:15.71elapsed
qemu-i386_on-x86_64-reg    15.11user 0.03system 0:15.15elapsed
qemu-x86_64_on-i386        mmap: No such device or address
qemu-x86_64_on-i386-reg    mmap: No such device or address
qemu-x86_64_on-x86_64      18.42user 0.07system 0:18.49elapsed
qemu-x86_64_on-x86_64-reg  13.22user 0.06system 0:13.31elapsed

Given my lack of knowledge of system QEMU, I will leave it to
someone else to measure the speedup.

A previous version of that patch, that only handled i386 target,
was tested by Malc who got speedup running OpenSSL on his G4.  It
was also sent to Fabrice who asked me to send it to the mailing
list.

The usage of globals is controlled by USE_REGS so that reviewers
can quickly test the benefit (or the lack of it).

Comments are welcome (except for the obvious presence of //
which is only temporary).  I need to optimize a few things once
I'm sure the temporaries (cpu_tmp0, ...) are not used outside of
the modified functions.  x86_64 was coded in a hurry and is
perhaps buggy.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

[-- Attachment #2: i386-reg2.patch --]
[-- Type: text/x-diff, Size: 22135 bytes --]

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 335fc08..dc2fcde 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,10 +58,15 @@
 
 //#define MACRO_TEST   1
 
+#define USE_REGS
+
 /* global register indexes */
 static TCGv_ptr cpu_env;
 static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
 static TCGv_i32 cpu_cc_op;
+#ifdef USE_REGS
+static TCGv cpu_regs[CPU_NB_REGS];
+#endif
 /* local temps */
 static TCGv cpu_T[2], cpu_T3;
 /* local register indexes (only used inside old micro ops) */
@@ -269,70 +274,95 @@ static inline void gen_op_andl_A0_ffff(void)
 #define REG_LH_OFFSET 4
 #endif
 
+#ifdef USE_REGS
+#ifdef TARGET_X86_64
+/* #warning NYI */
+#endif
+
 static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
+    TCGv tmp;
+
     switch(ot) {
     case OT_BYTE:
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(tmp, t0, 0xff);
         if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+            tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
+            tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+            //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
         } else {
-            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+            tcg_gen_shli_tl(tmp, tmp, 8);
+            tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
+            tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
+            //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
         }
+        tcg_temp_free(tmp);
         break;
     case OT_WORD:
-        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(tmp, t0, 0xffff);
+        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+        tcg_temp_free(tmp);
+        //tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
         break;
 #ifdef TARGET_X86_64
     case OT_LONG:
-        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
         /* high part of register set to zero */
-        tcg_gen_movi_tl(cpu_tmp0, 0);
-        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
+        //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        //tcg_gen_movi_tl(cpu_tmp0, 0);
+        //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
         break;
     default:
     case OT_QUAD:
-        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_mov_tl(cpu_regs[reg], t0);
+        //tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
         break;
 #else
     default:
     case OT_LONG:
-        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        tcg_gen_mov_tl(cpu_regs[reg], t0);
+        //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
         break;
 #endif
     }
 }
 
-static inline void gen_op_mov_reg_T0(int ot, int reg)
-{
-    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
-}
-
-static inline void gen_op_mov_reg_T1(int ot, int reg)
-{
-    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
-}
-
 static inline void gen_op_mov_reg_A0(int size, int reg)
 {
+    TCGv tmp;
+
     switch(size) {
     case 0:
-        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(tmp, cpu_A0, 0xffff);
+        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+        tcg_temp_free(tmp);
+        //tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
         break;
 #ifdef TARGET_X86_64
     case 1:
-        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
         /* high part of register set to zero */
-        tcg_gen_movi_tl(cpu_tmp0, 0);
-        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
+        //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        //tcg_gen_movi_tl(cpu_tmp0, 0);
+        //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
         break;
     default:
     case 2:
-        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+        //tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
         break;
 #else
     default:
     case 1:
-        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+        //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
         break;
 #endif
     }
@@ -345,59 +375,213 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
         if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
             goto std_case;
         } else {
-            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+            tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
+            tcg_gen_andi_tl(t0, t0, 0xff);
+            //tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
         }
         break;
     default:
     std_case:
-        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_mov_tl(t0, cpu_regs[reg]);
+        //tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
         break;
     }
 }
 
-static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+static inline void gen_op_movl_A0_reg(int reg)
 {
-    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+    //tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
 }
 
-static inline void gen_op_movl_A0_reg(int reg)
+static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
 {
-    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+    TCGv tmp;
+
+    switch(size) {
+    case 0:
+        // TODO optimize
+        tmp = tcg_temp_new();
+        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+        tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
+        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+        tcg_temp_free(tmp);
+        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+        //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+    case 1:
+        // TODO optimize
+        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+        tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+        //#ifdef TARGET_X86_64
+        //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+        //#endif
+        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#ifdef TARGET_X86_64
+    case 2:
+        tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
+        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#endif
+    }
 }
 
-static inline void gen_op_addl_A0_im(int32_t val)
+static inline void gen_op_add_reg_T0(int size, int reg)
 {
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+    TCGv tmp;
+
+    switch(size) {
+    case 0:
+        // TODO optimize
+        tmp = tcg_temp_new();
+        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+        tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
+        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+        tcg_temp_free(tmp);
+        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+        //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+    case 1:
+        // TODO optimize
+        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
 #ifdef TARGET_X86_64
-    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+        tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+        //#ifdef TARGET_X86_64
+        //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+        //#endif
+        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#ifdef TARGET_X86_64
+    case 2:
+        tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
+        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
+        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
 #endif
+    }
 }
 
-#ifdef TARGET_X86_64
-static inline void gen_op_addq_A0_im(int64_t val)
+static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 {
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+    if (shift != 0)
+        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+
+    //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+    //if (shift != 0)
+    //    tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+    //tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+    //#ifdef TARGET_X86_64
+    //tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+    //#endif
 }
+
+#else
+
+static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
+{
+    switch(ot) {
+    case OT_BYTE:
+        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+        } else {
+            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+        }
+        break;
+    case OT_WORD:
+        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        tcg_gen_movi_tl(cpu_tmp0, 0);
+        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        break;
+    default:
+    case OT_QUAD:
+        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#else
+    default:
+    case OT_LONG:
+        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        break;
 #endif
-    
-static void gen_add_A0_im(DisasContext *s, int val)
+    }
+}
+
+static inline void gen_op_mov_reg_A0(int size, int reg)
 {
+    switch(size) {
+    case 0:
+        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
 #ifdef TARGET_X86_64
-    if (CODE64(s))
-        gen_op_addq_A0_im(val);
-    else
+    case 1:
+        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        tcg_gen_movi_tl(cpu_tmp0, 0);
+        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        break;
+    default:
+    case 2:
+        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+#else
+    default:
+    case 1:
+        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        break;
 #endif
-        gen_op_addl_A0_im(val);
+    }
 }
 
-static inline void gen_op_addl_T0_T1(void)
+static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
 {
-    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+    switch(ot) {
+    case OT_BYTE:
+        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+            goto std_case;
+        } else {
+            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+        }
+        break;
+    default:
+    std_case:
+        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+    }
 }
 
-static inline void gen_op_jmp_T0(void)
+static inline void gen_op_movl_A0_reg(int reg)
 {
-    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
 }
 
 static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
@@ -452,15 +636,10 @@ static inline void gen_op_add_reg_T0(int size, int reg)
     }
 }
 
-static inline void gen_op_set_cc_op(int32_t val)
-{
-    tcg_gen_movi_i32(cpu_cc_op, val);
-}
-
 static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
-    if (shift != 0) 
+    if (shift != 0)
         tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
     tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 #ifdef TARGET_X86_64
@@ -468,6 +647,63 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 #endif
 }
 
+#endif
+
+static inline void gen_op_mov_reg_T0(int ot, int reg)
+{
+    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+}
+
+static inline void gen_op_mov_reg_T1(int ot, int reg)
+{
+    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+}
+
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+}
+
+static inline void gen_op_addl_A0_im(int32_t val)
+{
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_addq_A0_im(int64_t val)
+{
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+}
+#endif
+    
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s))
+        gen_op_addq_A0_im(val);
+    else
+#endif
+        gen_op_addl_A0_im(val);
+}
+
+static inline void gen_op_addl_T0_T1(void)
+{
+    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_jmp_T0(void)
+{
+    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+    tcg_gen_movi_i32(cpu_cc_op, val);
+}
+
 static inline void gen_op_movl_A0_seg(int reg)
 {
     tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
@@ -496,13 +732,21 @@ static inline void gen_op_addq_A0_seg(int reg)
 
 static inline void gen_op_movq_A0_reg(int reg)
 {
+#ifdef USE_REGS
+    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+#else
     tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+#endif
 }
 
 static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
 {
+#ifdef USE_REGS
+    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+#else
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
-    if (shift != 0) 
+#endif
+    if (shift != 0)
         tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
     tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 }
@@ -701,14 +945,22 @@ static void gen_exts(int ot, TCGv reg)
 
 static inline void gen_op_jnz_ecx(int size, int label1)
 {
+#ifdef USE_REGS
+    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+#else
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
+#endif
     gen_extu(size + 1, cpu_tmp0);
     tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 }
 
 static inline void gen_op_jz_ecx(int size, int label1)
 {
+#ifdef USE_REGS
+    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+#else
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
+#endif
     gen_extu(size + 1, cpu_tmp0);
     tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 }
@@ -4834,7 +5086,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 rm = 0; /* avoid warning */
             }
             label1 = gen_new_label();
+#ifdef USE_REGS
+            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
+#else
             tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
+#endif
             tcg_gen_sub_tl(t2, t2, t0);
             gen_extu(ot, t2);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
@@ -5409,7 +5665,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             val = ldub_code(s->pc++);
             tcg_gen_movi_tl(cpu_T3, val);
         } else {
+#ifdef USE_REGS
+            tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
+#else
             tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));
+#endif
         }
         gen_shiftd_rm_T1_T3(s, ot, opreg, op);
         break;
@@ -6317,10 +6577,18 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 /* XXX: specific Intel behaviour ? */
                 l1 = gen_new_label();
                 gen_jcc1(s, s->cc_op, b ^ 1, l1);
+#ifdef USE_REGS
+                tcg_gen_mov_tl(cpu_regs[reg], t0);
+#else
                 tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+#endif
                 gen_set_label(l1);
+#ifdef USE_REGS
+                tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
+#else
                 tcg_gen_movi_tl(cpu_tmp0, 0);
                 tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+#endif
             } else
 #endif
             {
@@ -7588,6 +7856,60 @@ void optimize_flags_init(void)
     cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
                                     "cc_tmp");
 
+#ifdef USE_REGS
+#ifdef TARGET_X86_64
+    cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EAX]), "rax");
+    cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_ECX]), "rcx");
+    cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EDX]), "rdx");
+    cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EBX]), "rbx");
+    cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_ESP]), "rsp");
+    cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EBP]), "rbp");
+    cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_ESI]), "rsi");
+    cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EDI]), "rdi");
+    cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
+                                         offsetof(CPUState, regs[8]), "r8");
+    cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[9]), "r9");
+    cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[10]), "r10");
+    cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[11]), "r11");
+    cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[12]), "r12");
+    cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[13]), "r13");
+    cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[14]), "r14");
+    cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUState, regs[15]), "r15");
+#else
+    cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EAX]), "eax");
+    cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_ECX]), "ecx");
+    cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EDX]), "edx");
+    cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EBX]), "ebx");
+    cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_ESP]), "esp");
+    cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EBP]), "ebp");
+    cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_ESI]), "esi");
+    cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
+                                             offsetof(CPUState, regs[R_EDI]), "edi");
+#endif
+#endif
+
     /* register helpers */
 #define GEN_HELPER 2
 #include "helper.h"

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-09-27 12:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-13 21:00 [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers Laurent Desnogues
2009-09-26 22:41 ` Aurelien Jarno
2009-09-27 12:51 ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).