qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg
@ 2018-09-11 20:28 Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
                   ` (13 more replies)
  0 siblings, 14 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

v2: https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg01122.html

Changes since v2:

- Add rth's R-b tag to the last patch
- Drop v2's first 10 patches, since Paolo already picked those up
- Move TCG temps + x86_64_hregs to DisasContext
  + While at it, drop the cpu_ prefix from the TCG temps,
    e.g. cpu_A0 -> s->A0
  + Split the conversion into separate patches to ease review.
    The patches are quite boring and long because the temps
    are everywhere, and I had to add DisasContext *s to quite a few
    functions

The series is checkpatch-clean.

You can fetch these patches from:
  https://github.com/cota/qemu/tree/i386-mttcg-v3

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:44   ` Richard Henderson
  2018-09-13 14:21   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
                   ` (12 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1f9d1d9b24..e9f512472e 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -73,7 +73,7 @@
 
 /* global register indexes */
 static TCGv cpu_A0;
-static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
@@ -135,6 +135,10 @@ typedef struct DisasContext {
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
     int cpuid_xsave_features;
+
+    /* TCG local temps */
+    TCGv cc_srcT;
+
     sigjmp_buf jmpbuf;
 } DisasContext;
 
@@ -244,7 +248,7 @@ static void set_cc_op(DisasContext *s, CCOp op)
         tcg_gen_discard_tl(cpu_cc_src2);
     }
     if (dead & USES_CC_SRCT) {
-        tcg_gen_discard_tl(cpu_cc_srcT);
+        tcg_gen_discard_tl(s->cc_srcT);
     }
 
     if (op == CC_OP_DYNAMIC) {
@@ -667,11 +671,11 @@ static inline void gen_op_testl_T0_T1_cc(void)
     tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 }
 
-static void gen_op_update_neg_cc(void)
+static void gen_op_update_neg_cc(DisasContext *s)
 {
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
     tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
-    tcg_gen_movi_tl(cpu_cc_srcT, 0);
+    tcg_gen_movi_tl(s->cc_srcT, 0);
 }
 
 /* compute all eflags to cc_src */
@@ -742,7 +746,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
         /* If no temporary was used, be careful not to alias t1 and t0.  */
         t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
-        tcg_gen_mov_tl(t0, cpu_cc_srcT);
+        tcg_gen_mov_tl(t0, s->cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
 
@@ -899,7 +903,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
+            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_extu(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
@@ -912,7 +916,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
+            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_exts(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
             cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
@@ -1309,11 +1313,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
         } else {
-            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
             tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
@@ -1356,7 +1360,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_CMPL:
         tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
         tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
@@ -4823,7 +4827,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
             }
-            gen_op_update_neg_cc();
+            gen_op_update_neg_cc(s);
             set_cc_op(s, CC_OP_SUBB + ot);
             break;
         case 4: /* mul */
@@ -5283,7 +5287,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
             }
             tcg_gen_mov_tl(cpu_cc_src, oldv);
-            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
+            tcg_gen_mov_tl(s->cc_srcT, cmpv);
             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
             set_cc_op(s, CC_OP_SUBB + ot);
             tcg_temp_free(oldv);
@@ -8463,7 +8467,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp4 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
-    cpu_cc_srcT = tcg_temp_local_new();
+    dc->cc_srcT = tcg_temp_local_new();
 }
 
 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:45   ` Richard Henderson
  2018-09-13 14:23   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
                   ` (11 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 472 ++++++++++++++++++++--------------------
 1 file changed, 236 insertions(+), 236 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index e9f512472e..c6b1baab9d 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -72,7 +72,6 @@
 //#define MACRO_TEST   1
 
 /* global register indexes */
-static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
@@ -138,6 +137,7 @@ typedef struct DisasContext {
 
     /* TCG local temps */
     TCGv cc_srcT;
+    TCGv A0;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -395,9 +395,9 @@ static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 
 static void gen_add_A0_im(DisasContext *s, int val)
 {
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+    tcg_gen_addi_tl(s->A0, s->A0, val);
     if (!CODE64(s)) {
-        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+        tcg_gen_ext32u_tl(s->A0, s->A0);
     }
 }
 
@@ -431,7 +431,7 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
     if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
+        gen_op_st_v(s, idx, cpu_T0, s->A0);
     } else {
         gen_op_mov_reg_v(idx, d, cpu_T0);
     }
@@ -453,7 +453,7 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 #ifdef TARGET_X86_64
     case MO_64:
         if (ovr_seg < 0) {
-            tcg_gen_mov_tl(cpu_A0, a0);
+            tcg_gen_mov_tl(s->A0, a0);
             return;
         }
         break;
@@ -464,14 +464,14 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
             ovr_seg = def_seg;
         }
         if (ovr_seg < 0) {
-            tcg_gen_ext32u_tl(cpu_A0, a0);
+            tcg_gen_ext32u_tl(s->A0, a0);
             return;
         }
         break;
     case MO_16:
         /* 16 bit address */
-        tcg_gen_ext16u_tl(cpu_A0, a0);
-        a0 = cpu_A0;
+        tcg_gen_ext16u_tl(s->A0, a0);
+        a0 = s->A0;
         if (ovr_seg < 0) {
             if (s->addseg) {
                 ovr_seg = def_seg;
@@ -488,13 +488,13 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
         TCGv seg = cpu_seg_base[ovr_seg];
 
         if (aflag == MO_64) {
-            tcg_gen_add_tl(cpu_A0, a0, seg);
+            tcg_gen_add_tl(s->A0, a0, seg);
         } else if (CODE64(s)) {
-            tcg_gen_ext32u_tl(cpu_A0, a0);
-            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
+            tcg_gen_ext32u_tl(s->A0, a0);
+            tcg_gen_add_tl(s->A0, s->A0, seg);
         } else {
-            tcg_gen_add_tl(cpu_A0, a0, seg);
-            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+            tcg_gen_add_tl(s->A0, a0, seg);
+            tcg_gen_ext32u_tl(s->A0, s->A0);
         }
     }
 }
@@ -640,9 +640,9 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, s->A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1072,7 +1072,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
     gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
@@ -1080,7 +1080,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, s->A0);
     gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
@@ -1089,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1098,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(ot);
@@ -1128,11 +1128,11 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
     tcg_gen_movi_tl(cpu_T0, 0);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
     gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
     gen_bpt_io(s, cpu_tmp2_i32, ot);
@@ -1147,7 +1147,7 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
         gen_io_start();
     }
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, s->A0);
 
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
@@ -1267,14 +1267,14 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     if (d != OR_TMP0) {
         gen_op_mov_v_reg(ot, cpu_T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
-        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1289,7 +1289,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
             tcg_gen_neg_tl(cpu_T0, cpu_T0);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1301,7 +1301,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1313,7 +1313,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
             tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
         } else {
@@ -1327,7 +1327,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1338,7 +1338,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1349,7 +1349,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1372,13 +1372,13 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
     if (s1->prefix & PREFIX_LOCK) {
         tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
-        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
             gen_op_mov_v_reg(ot, cpu_T0, d);
         } else {
-            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
         }
         tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
         gen_op_st_rm_T0_A0(s1, ot, d);
@@ -1441,7 +1441,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -1477,7 +1477,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     else
         gen_op_mov_v_reg(ot, cpu_T0, op1);
 
@@ -1517,7 +1517,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -1603,7 +1603,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -1681,7 +1681,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     else
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     
@@ -1737,7 +1737,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -2052,7 +2052,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
 }
 
 /* Compute the address, with a minimum number of TCG ops.  */
-static TCGv gen_lea_modrm_1(AddressParts a)
+static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
 {
     TCGv ea = NULL;
 
@@ -2060,22 +2060,22 @@ static TCGv gen_lea_modrm_1(AddressParts a)
         if (a.scale == 0) {
             ea = cpu_regs[a.index];
         } else {
-            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
-            ea = cpu_A0;
+            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
+            ea = s->A0;
         }
         if (a.base >= 0) {
-            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
-            ea = cpu_A0;
+            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
+            ea = s->A0;
         }
     } else if (a.base >= 0) {
         ea = cpu_regs[a.base];
     }
     if (!ea) {
-        tcg_gen_movi_tl(cpu_A0, a.disp);
-        ea = cpu_A0;
+        tcg_gen_movi_tl(s->A0, a.disp);
+        ea = s->A0;
     } else if (a.disp != 0) {
-        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
-        ea = cpu_A0;
+        tcg_gen_addi_tl(s->A0, ea, a.disp);
+        ea = s->A0;
     }
 
     return ea;
@@ -2084,7 +2084,7 @@ static TCGv gen_lea_modrm_1(AddressParts a)
 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
 {
     AddressParts a = gen_lea_modrm_0(env, s, modrm);
-    TCGv ea = gen_lea_modrm_1(a);
+    TCGv ea = gen_lea_modrm_1(s, a);
     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
 }
 
@@ -2097,7 +2097,7 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
                       TCGCond cond, TCGv_i64 bndv)
 {
-    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
+    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
 
     tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
     if (!CODE64(s)) {
@@ -2111,7 +2111,7 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
 /* used for LEA and MOV AX, mem */
 static void gen_add_A0_ds_seg(DisasContext *s)
 {
-    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
+    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
 }
 
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
@@ -2138,9 +2138,9 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_st_v(s, ot, cpu_T0, s->A0);
         } else {
-            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T0, s->A0);
             if (reg != OR_TMP0)
                 gen_op_mov_reg_v(ot, reg, cpu_T0);
         }
@@ -2334,19 +2334,19 @@ static void gen_push_v(DisasContext *s, TCGv val)
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
     TCGMemOp a_ot = mo_stacksize(s);
     int size = 1 << d_ot;
-    TCGv new_esp = cpu_A0;
+    TCGv new_esp = s->A0;
 
-    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
+    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
 
     if (!CODE64(s)) {
         if (s->addseg) {
             new_esp = cpu_tmp4;
-            tcg_gen_mov_tl(new_esp, cpu_A0);
+            tcg_gen_mov_tl(new_esp, s->A0);
         }
-        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
+        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
     }
 
-    gen_op_st_v(s, d_ot, val, cpu_A0);
+    gen_op_st_v(s, d_ot, val, s->A0);
     gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
 }
 
@@ -2356,7 +2356,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
 
     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
 
     return d_ot;
 }
@@ -2379,9 +2379,9 @@ static void gen_pusha(DisasContext *s)
     int i;
 
     for (i = 0; i < 8; i++) {
-        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
-        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
+        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
+        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
     }
 
     gen_stack_update(s, -8 * size);
@@ -2399,9 +2399,9 @@ static void gen_popa(DisasContext *s)
         if (7 - i == R_ESP) {
             continue;
         }
-        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
-        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
-        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
+        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
+        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
         gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
     }
 
@@ -2417,7 +2417,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     /* Push BP; compute FrameTemp into T1.  */
     tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
     gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
-    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
+    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
 
     level &= 31;
     if (level != 0) {
@@ -2425,19 +2425,19 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
 
         /* Copy level-1 pointers from the previous frame.  */
         for (i = 1; i < level; ++i) {
-            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
-            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
+            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
+            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+            gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
 
-            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
-            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
+            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
+            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+            gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
-        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
-        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
+        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
+        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
     }
 
     /* Copy the FrameTemp value to EBP.  */
@@ -2454,7 +2454,7 @@ static void gen_leave(DisasContext *s)
     TCGMemOp a_ot = mo_stacksize(s);
 
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
 
     tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
 
@@ -2633,22 +2633,22 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
 
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
 }
 
 static inline void gen_stq_env_A0(DisasContext *s, int offset)
 {
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
 }
 
 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
     tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
@@ -2657,8 +2657,8 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
-    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
     tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
 }
@@ -3128,7 +3128,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             } else {
                 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
                     xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
@@ -3193,7 +3193,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
                 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
                 tcg_gen_movi_tl(cpu_T0, 0);
                 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
@@ -3380,7 +3380,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
@@ -3555,7 +3555,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
                 } else {
-                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
                     tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
@@ -3694,13 +3694,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
-                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
+                        tcg_gen_qemu_ld_tl(cpu_tmp0, s->A0,
                                            s->mem_index, MO_LEUW);
                         tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_W(0)));
@@ -3789,11 +3789,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
+                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
                                        s->mem_index, ot | MO_BE);
                     gen_op_mov_reg_v(ot, reg, cpu_T0);
                 } else {
-                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
+                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
                 }
                 break;
@@ -3825,23 +3825,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
-                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
+                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
+                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
 
                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
+                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
                                        cpu_T0, zero);
                     tcg_temp_free(zero);
 
                     /* Extract the LEN into a mask.  Lengths larger than
                        operand size get all ones.  */
-                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
-                                       cpu_A0, bound);
+                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
+                                       s->A0, bound);
                     tcg_temp_free(bound);
                     tcg_gen_movi_tl(cpu_T1, 1);
-                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
+                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
                     tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
                     tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
 
@@ -3870,9 +3870,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                        bound, bound, cpu_T1);
                     tcg_temp_free(bound);
                 }
-                tcg_gen_movi_tl(cpu_A0, -1);
-                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
+                tcg_gen_movi_tl(s->A0, -1);
+                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
+                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
                 gen_op_mov_reg_v(ot, reg, cpu_T0);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_BMILGB + ot);
@@ -4124,7 +4124,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     break;
@@ -4134,7 +4134,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_LEUW);
                     }
                     break;
@@ -4146,7 +4146,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
                         } else {
-                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                     } else { /* pextrq */
@@ -4157,7 +4157,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
                         } else {
-                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
 #else
@@ -4171,7 +4171,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_LEUL);
                     }
                     break;
@@ -4179,7 +4179,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_v_reg(MO_32, cpu_T0, rm);
                     } else {
-                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
@@ -4191,7 +4191,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                         offsetof(CPUX86State,xmm_regs[rm]
                                                 .ZMM_L((val >> 6) & 3)));
                     } else {
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                     tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
@@ -4219,7 +4219,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
                         } else {
-                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
@@ -4230,7 +4230,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
                         } else {
-                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
                         tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
@@ -4360,7 +4360,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (sz) {
                 case 2:
                     /* 32 bit access */
-                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
                     tcg_gen_st32_tl(cpu_T0, cpu_env,
                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                     break;
@@ -4426,15 +4426,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* maskmov : we must prepare A0 */
             if (mod != 3)
                 goto illegal_op;
-            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
-            gen_extu(s->aflag, cpu_A0);
+            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
+            gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
+            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, s->A0);
             break;
         default:
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4673,7 +4673,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
@@ -4760,7 +4760,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* For those below that handle locked memory, don't load here.  */
             if (!(s->prefix & PREFIX_LOCK)
                 || op != 2) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
             }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
@@ -4779,12 +4779,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 tcg_gen_movi_tl(cpu_T0, ~0);
-                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
                                             s->mem_index, ot | MO_LE);
             } else {
                 tcg_gen_not_tl(cpu_T0, cpu_T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, cpu_T0, s->A0);
                 } else {
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
@@ -4802,7 +4802,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 t0 = tcg_temp_local_new();
                 label1 = gen_new_label();
 
-                tcg_gen_mov_tl(a0, cpu_A0);
+                tcg_gen_mov_tl(a0, s->A0);
                 tcg_gen_mov_tl(t0, cpu_T0);
 
                 gen_set_label(label1);
@@ -4822,7 +4822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 tcg_gen_neg_tl(cpu_T0, cpu_T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, cpu_T0, s->A0);
                 } else {
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
@@ -5001,7 +5001,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
@@ -5034,9 +5034,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_jr(s, cpu_T0);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
@@ -5061,9 +5061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_jr(s, cpu_T0);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
@@ -5225,13 +5225,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
+                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
                                             s->mem_index, ot | MO_LE);
                 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
             } else {
-                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T1, s->A0);
                 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, s->A0);
             }
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
@@ -5258,7 +5258,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
+                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                                           s->mem_index, ot | MO_LE);
                 gen_op_mov_reg_v(ot, R_EAX, oldv);
             } else {
@@ -5267,7 +5267,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     gen_op_mov_v_reg(ot, oldv, rm);
                 } else {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, oldv, cpu_A0);
+                    gen_op_ld_v(s, ot, oldv, s->A0);
                     rm = 0; /* avoid warning */
                 }
                 gen_extu(ot, oldv);
@@ -5282,7 +5282,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                        must be before changing accumulator to ensure
                        idempotency if the store faults and the instruction
                        is restarted */
-                    gen_op_st_v(s, ot, newv, cpu_A0);
+                    gen_op_st_v(s, ot, newv, s->A0);
                     gen_op_mov_reg_v(ot, R_EAX, oldv);
                 }
             }
@@ -5306,9 +5306,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
             if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
-                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+                gen_helper_cmpxchg16b(cpu_env, s->A0);
             } else {
-                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
+                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
             }
         } else
 #endif        
@@ -5317,9 +5317,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
             if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
-                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+                gen_helper_cmpxchg8b(cpu_env, s->A0);
             } else {
-                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
+                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
             }
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5453,7 +5453,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T0, val);
         if (mod != 3) {
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_st_v(s, ot, cpu_T0, s->A0);
         } else {
             gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
         }
@@ -5540,7 +5540,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             }
         }
@@ -5554,9 +5554,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
         {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
-            TCGv ea = gen_lea_modrm_1(a);
+            TCGv ea = gen_lea_modrm_1(s, a);
             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
-            gen_op_mov_reg_v(dflag, reg, cpu_A0);
+            gen_op_mov_reg_v(dflag, reg, s->A0);
         }
         break;
 
@@ -5578,24 +5578,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 offset_addr = insn_get(env, s, s->aflag);
                 break;
             }
-            tcg_gen_movi_tl(cpu_A0, offset_addr);
+            tcg_gen_movi_tl(s->A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
                 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
             } else {
                 gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, s->A0);
             }
         }
         break;
     case 0xd7: /* xlat */
-        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
+        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
         tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
-        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
-        gen_extu(s->aflag, cpu_A0);
+        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
+        gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
         gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
@@ -5646,7 +5646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             gen_op_mov_v_reg(ot, cpu_T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
+            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
                                    s->mem_index, ot | MO_LE);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
@@ -5675,10 +5675,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
         gen_op_mov_reg_v(ot, reg, cpu_T1);
@@ -5798,23 +5798,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
                         gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
@@ -5837,23 +5837,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0:
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
                         gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
@@ -5864,18 +5864,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     switch(op >> 4) {
                     case 1:
                         gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
                         gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
                         gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5885,23 +5885,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     switch(op >> 4) {
                     case 0:
                         gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 1:
                         gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
                         gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
                         gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5911,53 +5911,53 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
                 break;
             case 0x0c: /* fldenv mem */
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
-                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
-                gen_helper_fldt_ST0(cpu_env, cpu_A0);
+                gen_helper_fldt_ST0(cpu_env, s->A0);
                 break;
             case 0x1f: /* fstpt mem */
-                gen_helper_fstt_ST0(cpu_env, cpu_A0);
+                gen_helper_fstt_ST0(cpu_env, s->A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2e: /* fnsave mem */
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
-                gen_helper_fbld_ST0(cpu_env, cpu_A0);
+                gen_helper_fbld_ST0(cpu_env, s->A0);
                 break;
             case 0x3e: /* fbstp */
-                gen_helper_fbst_ST0(cpu_env, cpu_A0);
+                gen_helper_fbst_ST0(cpu_env, s->A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
                 gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
-                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+                tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fpop(cpu_env);
                 break;
             default:
@@ -6471,13 +6471,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
             gen_op_jmp_v(cpu_T0);
             /* pop selector */
             gen_add_A0_im(s, 1 << dflag);
-            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
             gen_op_movl_seg_T0_vm(R_CS);
             /* add stack offset */
             gen_stack_update(s, val + (2 << dflag));
@@ -6732,7 +6732,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
             }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
@@ -6768,10 +6768,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exts(ot, cpu_T1);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
-            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
+            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
             }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
@@ -6785,20 +6785,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 0: /* bt */
                 /* Needs no atomic ops; we surpressed the normal
                    memory load for LOCK above so do it now.  */
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
                 tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
@@ -6822,7 +6822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             if (op != 0) {
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, cpu_T0, s->A0);
                 } else {
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
@@ -7051,9 +7051,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_lea_modrm(env, s, modrm);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
         if (ot == MO_16) {
-            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
+            gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
         } else {
-            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
+            gen_helper_boundl(cpu_env, s->A0, cpu_tmp2_i32);
         }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
@@ -7293,13 +7293,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0,
                              cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
             gen_add_A0_im(s, 2);
             tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             break;
 
         case 0xc8: /* monitor */
@@ -7308,10 +7308,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
-            gen_extu(s->aflag, cpu_A0);
+            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
+            gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
-            gen_helper_monitor(cpu_env, cpu_A0);
+            gen_helper_monitor(cpu_env, s->A0);
             break;
 
         case 0xc9: /* mwait */
@@ -7348,13 +7348,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
             gen_add_A0_im(s, 2);
             tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             break;
 
         case 0xd0: /* xgetbv */
@@ -7498,9 +7498,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
@@ -7515,9 +7515,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
@@ -7573,7 +7573,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_lea_modrm(env, s, modrm);
-            gen_helper_invlpg(cpu_env, cpu_A0);
+            gen_helper_invlpg(cpu_env, s->A0);
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
             break;
@@ -7646,7 +7646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             }
         } else
@@ -7667,9 +7667,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = modrm & 7;
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, ot, t0, cpu_A0);
+                gen_op_ld_v(s, ot, t0, s->A0);
                 a0 = tcg_temp_local_new();
-                tcg_gen_mov_tl(a0, cpu_A0);
+                tcg_gen_mov_tl(a0, s->A0);
             } else {
                 gen_op_mov_v_reg(ot, t0, rm);
                 a0 = NULL;
@@ -7785,16 +7785,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     if (CODE64(s)) {
-                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEQ);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
-                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 8);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEQ);
                     } else {
-                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
-                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 4);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                     /* bnd registers are now in-use */
@@ -7810,22 +7810,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 if (a.base >= 0) {
-                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
                 } else {
-                    tcg_gen_movi_tl(cpu_A0, 0);
+                    tcg_gen_movi_tl(s->A0, 0);
                 }
-                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
                     tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
                 } else {
                     tcg_gen_movi_tl(cpu_T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
+                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
                 } else {
-                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
+                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
                 }
@@ -7859,11 +7859,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     /* rip-relative generates #ud */
                     goto illegal_op;
                 }
-                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
+                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
                 if (!CODE64(s)) {
-                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+                    tcg_gen_ext32u_tl(s->A0, s->A0);
                 }
-                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
+                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
                 /* bnd registers are now in-use */
                 gen_set_hflag(s, HF_MPX_IU_MASK);
                 break;
@@ -7892,16 +7892,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     if (CODE64(s)) {
-                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEQ);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
-                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 8);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEQ);
                     } else {
-                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
-                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 4);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                 }
@@ -7915,21 +7915,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 if (a.base >= 0) {
-                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
                 } else {
-                    tcg_gen_movi_tl(cpu_A0, 0);
+                    tcg_gen_movi_tl(s->A0, 0);
                 }
-                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
                     tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
                 } else {
                     tcg_gen_movi_tl(cpu_T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
+                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 } else {
-                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
+                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 }
             }
@@ -8069,7 +8069,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_helper_fxsave(cpu_env, cpu_A0);
+            gen_helper_fxsave(cpu_env, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(1): /* fxrstor */
@@ -8082,7 +8082,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_helper_fxrstor(cpu_env, cpu_A0);
+            gen_helper_fxrstor(cpu_env, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
@@ -8094,7 +8094,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
+            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0, s->mem_index, MO_LEUL);
             gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
             break;
 
@@ -8108,7 +8108,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
-            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(4): /* xsave */
@@ -8120,7 +8120,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
+            gen_helper_xsave(cpu_env, s->A0, cpu_tmp1_i64);
             break;
 
         CASE_MODRM_MEM_OP(5): /* xrstor */
@@ -8132,7 +8132,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+            gen_helper_xrstor(cpu_env, s->A0, cpu_tmp1_i64);
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
@@ -8160,7 +8160,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                       cpu_regs[R_EDX]);
-                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
+                gen_helper_xsaveopt(cpu_env, s->A0, cpu_tmp1_i64);
             }
             break;
 
@@ -8458,7 +8458,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 
     cpu_T0 = tcg_temp_new();
     cpu_T1 = tcg_temp_new();
-    cpu_A0 = tcg_temp_new();
+    dc->A0 = tcg_temp_new();
 
     cpu_tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:47   ` Richard Henderson
  2018-09-13 14:25   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
                   ` (10 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 1174 ++++++++++++++++++++-------------------
 1 file changed, 594 insertions(+), 580 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c6b1baab9d..73fd7e5b9a 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 /* local temps */
-static TCGv cpu_T0, cpu_T1;
+static TCGv cpu_T1;
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
@@ -138,6 +138,7 @@ typedef struct DisasContext {
     /* TCG local temps */
     TCGv cc_srcT;
     TCGv A0;
+    TCGv T0;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -412,9 +413,9 @@ static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
     gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
-static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
+static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
-    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
+    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], s->T0);
     gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
@@ -431,9 +432,9 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
     if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, cpu_T0, s->A0);
+        gen_op_st_v(s, idx, s->T0, s->A0);
     } else {
-        gen_op_mov_reg_v(idx, d, cpu_T0);
+        gen_op_mov_reg_v(idx, d, s->T0);
     }
 }
 
@@ -509,10 +510,10 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s)
     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 }
 
-static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
+static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
 {
-    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
-    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
+    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
+    tcg_gen_shli_tl(s->T0, s->T0, ot);
 };
 
 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
@@ -610,7 +611,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
     target_ulong next_eip;
 
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         switch (ot) {
         case MO_8:
             gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
@@ -630,7 +631,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
         gen_jmp_im(cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
                                 tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
@@ -640,41 +641,41 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
-static void gen_op_update1_cc(void)
+static void gen_op_update1_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update2_cc(void)
+static void gen_op_update2_cc(DisasContext *s)
 {
     tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update3_cc(TCGv reg)
+static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 {
     tcg_gen_mov_tl(cpu_cc_src2, reg);
     tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static inline void gen_op_testl_T0_T1_cc(void)
+static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 {
-    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
+    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
 }
 
 static void gen_op_update_neg_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+    tcg_gen_neg_tl(cpu_cc_src, s->T0);
     tcg_gen_movi_tl(s->cc_srcT, 0);
 }
 
@@ -1022,11 +1023,11 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
+    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
-        cc.reg = cpu_T0;
+        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
+        cc.reg = s->T0;
     }
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -1040,12 +1041,12 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
    A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
+    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     gen_update_cc_op(s);
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
-        cc.reg = cpu_T0;
+        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
+        cc.reg = s->T0;
     }
     set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
@@ -1070,20 +1071,20 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 
 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
+    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, s->A0);
-    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
+    gen_op_mov_reg_v(ot, R_EAX, s->T0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 }
 
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
@@ -1091,8 +1092,8 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_EDI(s);
     gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
@@ -1101,9 +1102,9 @@ static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
     gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
@@ -1127,14 +1128,14 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
-    tcg_gen_movi_tl(cpu_T0, 0);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
+    tcg_gen_movi_tl(s->T0, 0);
+    gen_op_st_v(s, ot, s->T0, s->A0);
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_helper_in_func(ot, s->T0, cpu_tmp2_i32);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
     gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
@@ -1147,14 +1148,14 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
         gen_io_start();
     }
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
 
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
+    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
     gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
     gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
@@ -1265,103 +1266,103 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, cpu_T0, d);
+        gen_op_mov_v_reg(ot, s1->T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
-        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
+        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
+            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(cpu_tmp4);
+        gen_op_update3_cc(s1, cpu_tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
-            tcg_gen_neg_tl(cpu_T0, cpu_T0);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
+            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
+            tcg_gen_neg_tl(s1->T0, s1->T0);
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(cpu_tmp4);
+        gen_op_update3_cc(s1, cpu_tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s1);
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
+            tcg_gen_neg_tl(s1->T0, cpu_T1);
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
         } else {
-            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
         tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
-        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
+        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
@@ -1371,21 +1372,21 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
     if (s1->prefix & PREFIX_LOCK) {
-        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
-        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
+        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
+        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
-            gen_op_mov_v_reg(ot, cpu_T0, d);
+            gen_op_mov_v_reg(ot, s1->T0, d);
         } else {
-            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
+            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
         }
-        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
+        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
         gen_op_st_rm_T0_A0(s1, ot, d);
     }
 
     gen_compute_eflags_c(s1, cpu_cc_src);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
 }
 
@@ -1441,9 +1442,9 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
@@ -1451,23 +1452,23 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     if (is_right) {
         if (is_arith) {
-            gen_exts(ot, cpu_T0);
-            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_exts(ot, s->T0);
+            tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
         } else {
-            gen_extu(ot, cpu_T0);
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_extu(ot, s->T0);
+            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
         }
     } else {
-        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
+        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
+    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1477,25 +1478,25 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
 
     op2 &= mask;
     if (op2 != 0) {
         if (is_right) {
             if (is_arith) {
-                gen_exts(ot, cpu_T0);
-                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
-                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
+                gen_exts(ot, s->T0);
+                tcg_gen_sari_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_sari_tl(s->T0, s->T0, op2);
             } else {
-                gen_extu(ot, cpu_T0);
-                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
+                gen_extu(ot, s->T0);
+                tcg_gen_shri_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_shri_tl(s->T0, s->T0, op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
-            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
+            tcg_gen_shli_tl(cpu_tmp4, s->T0, op2 - 1);
+            tcg_gen_shli_tl(s->T0, s->T0, op2);
         }
     }
 
@@ -1505,7 +1506,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     /* update eflags if non zero shift */
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
 }
@@ -1517,9 +1518,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
@@ -1527,31 +1528,31 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     switch (ot) {
     case MO_8:
         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
-        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
-        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
+        tcg_gen_ext8u_tl(s->T0, s->T0);
+        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
         goto do_long;
     case MO_16:
         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
-        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
+        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
         goto do_long;
     do_long:
 #ifdef TARGET_X86_64
     case MO_32:
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         if (is_right) {
             tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         } else {
             tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         }
-        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+        tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
         break;
 #endif
     default:
         if (is_right) {
-            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
         } else {
-            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
         }
         break;
     }
@@ -1567,12 +1568,12 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
        since we've computed the flags into CC_SRC, these variables are
        currently dead.  */
     if (is_right) {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
-        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
+        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
+        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
     } else {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
-        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
+        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
+        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
     }
     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1603,9 +1604,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     op2 &= mask;
@@ -1613,20 +1614,20 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
             if (is_right) {
                 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
             } else {
                 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
             }
-            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
             break;
 #endif
         default:
             if (is_right) {
-                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
+                tcg_gen_rotri_tl(s->T0, s->T0, op2);
             } else {
-                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
+                tcg_gen_rotli_tl(s->T0, s->T0, op2);
             }
             break;
         case MO_8:
@@ -1639,10 +1640,10 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
             if (is_right) {
                 shift = mask + 1 - shift;
             }
-            gen_extu(ot, cpu_T0);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
-            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+            gen_extu(ot, s->T0);
+            tcg_gen_shli_tl(cpu_tmp0, s->T0, shift);
+            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
+            tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
             break;
         }
     }
@@ -1659,12 +1660,12 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
            since we've computed the flags into CC_SRC, these variables are
            currently dead.  */
         if (is_right) {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
-            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
+            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
+            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
         } else {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
-            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
+            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
+            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
         }
         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1681,24 +1682,24 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     
     if (is_right) {
         switch (ot) {
         case MO_8:
-            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_16:
-            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_32:
-            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #endif
         default:
@@ -1707,17 +1708,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     } else {
         switch (ot) {
         case MO_8:
-            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_16:
-            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_32:
-            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #endif
         default:
@@ -1737,9 +1738,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     count = tcg_temp_new();
@@ -1751,11 +1752,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
-            tcg_gen_mov_tl(cpu_T1, cpu_T0);
-            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
+            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
+            tcg_gen_mov_tl(cpu_T1, s->T0);
+            tcg_gen_mov_tl(s->T0, cpu_tmp0);
         } else {
-            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
+            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
@@ -1763,28 +1764,28 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
+            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
+            tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
-            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
-            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
+            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
+            tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shl_i64(s->T0, s->T0, count);
             tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
-            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
+            tcg_gen_shri_i64(s->T0, s->T0, 32);
         }
         break;
 #endif
     default:
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
+            tcg_gen_shr_tl(s->T0, s->T0, count);
             tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
         } else {
-            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
@@ -1793,20 +1794,20 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
+            tcg_gen_shl_tl(s->T0, s->T0, count);
             tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
         }
         tcg_gen_movi_tl(cpu_tmp4, 0);
         tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
                            cpu_tmp4, cpu_T1);
-        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
         break;
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
+    gen_shift_flags(s, ot, s->T0, cpu_tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
@@ -2126,23 +2127,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+                gen_op_mov_v_reg(ot, s->T0, reg);
+            gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
         }
     } else {
         gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_st_v(s, ot, cpu_T0, s->A0);
+                gen_op_mov_v_reg(ot, s->T0, reg);
+            gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_ld_v(s, ot, cpu_T0, s->A0);
+            gen_op_ld_v(s, ot, s->T0, s->A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
         }
     }
 }
@@ -2251,9 +2252,9 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
         cc.reg2 = tcg_const_tl(cc.imm);
     }
 
-    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
-                       cpu_T0, cpu_regs[reg]);
-    gen_op_mov_reg_v(ot, reg, cpu_T0);
+    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
+                       s->T0, cpu_regs[reg]);
+    gen_op_mov_reg_v(ot, reg, s->T0);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2263,18 +2264,18 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
     }
 }
 
-static inline void gen_op_movl_T0_seg(int seg_reg)
+static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+    tcg_gen_ld32u_tl(s->T0, cpu_env,
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }
 
-static inline void gen_op_movl_seg_T0_vm(int seg_reg)
+static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
 {
-    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
-    tcg_gen_st32_tl(cpu_T0, cpu_env,
+    tcg_gen_ext16u_tl(s->T0, s->T0);
+    tcg_gen_st32_tl(s->T0, cpu_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
-    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
+    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
 }
 
 /* move T0 to seg_reg and compute if the CPU state may change. Never
@@ -2282,7 +2283,7 @@ static inline void gen_op_movl_seg_T0_vm(int seg_reg)
 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
 {
     if (s->pe && !s->vm86) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
@@ -2292,7 +2293,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
             s->base.is_jmp = DISAS_TOO_MANY;
         }
     } else {
-        gen_op_movl_seg_T0_vm(seg_reg);
+        gen_op_movl_seg_T0_vm(s, seg_reg);
         if (seg_reg == R_SS) {
             s->base.is_jmp = DISAS_TOO_MANY;
         }
@@ -2356,7 +2357,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
 
     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
     return d_ot;
 }
@@ -2401,8 +2402,8 @@ static void gen_popa(DisasContext *s)
         }
         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
-        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
-        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
+        gen_op_ld_v(s, d_ot, s->T0, s->A0);
+        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
     }
 
     gen_stack_update(s, 8 * size);
@@ -2454,11 +2455,11 @@ static void gen_leave(DisasContext *s)
     TCGMemOp a_ot = mo_stacksize(s);
 
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
     tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
 
-    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
+    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
     gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
 }
 
@@ -3126,23 +3127,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                     xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
+                gen_op_st_v(s, MO_32, s->T0, s->A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+                tcg_gen_st_tl(s->T0, cpu_env,
+                              offsetof(CPUX86State, fpregs[reg].mmx));
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
             }
             break;
@@ -3152,14 +3154,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
+                gen_helper_movq_mm_T0_xmm(cpu_ptr0, s->T0);
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
             }
             break;
@@ -3193,12 +3195,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
+                gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
@@ -3210,9 +3216,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].ZMM_Q(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
@@ -3314,13 +3322,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                tcg_gen_ld_i64(s->T0, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3328,13 +3336,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                tcg_gen_ld_i64(s->T0, cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3379,8 +3387,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
+                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
+                gen_op_st_v(s, MO_32, s->T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
@@ -3429,16 +3438,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             val = x86_ldub_code(env, s);
             if (is_xmm) {
-                tcg_gen_movi_tl(cpu_T0, val);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
+                tcg_gen_movi_tl(s->T0, val);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
                 op1_offset = offsetof(CPUX86State,xmm_t0);
             } else {
-                tcg_gen_movi_tl(cpu_T0, val);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+                tcg_gen_movi_tl(s->T0, val);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
@@ -3503,12 +3516,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
-                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
+                sse_fn_epl(cpu_env, cpu_ptr0, s->T0);
 #else
                 goto illegal_op;
 #endif
@@ -3555,8 +3568,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
                 } else {
-                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
-                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
+                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                    tcg_gen_st32_tl(s->T0, cpu_env,
+                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
             } else {
@@ -3568,17 +3582,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
                 sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
-                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
+                sse_fn_l_ep(s->T0, cpu_env, cpu_ptr0);
 #else
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3587,11 +3601,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             val = x86_ldub_code(env, s);
             if (b1) {
                 val &= 7;
-                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                tcg_gen_st16_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
             } else {
                 val &= 3;
-                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                tcg_gen_st16_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
             }
             break;
@@ -3604,16 +3618,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld16u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
             } else {
                 val &= 3;
                 rm = (modrm & 7);
-                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld16u_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
@@ -3760,11 +3774,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
-                                 cpu_T0, tcg_const_i32(8 << ot));
+                gen_helper_crc32(s->T0, cpu_tmp2_i32,
+                                 s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3789,9 +3803,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
+                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                        s->mem_index, ot | MO_BE);
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
+                    gen_op_mov_reg_v(ot, reg, s->T0);
                 } else {
                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
@@ -3806,9 +3820,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
-                gen_op_update1_cc();
+                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
+                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
 
@@ -3826,12 +3840,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
+                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
 
                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
-                                       cpu_T0, zero);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
+                                       s->T0, zero);
                     tcg_temp_free(zero);
 
                     /* Extract the LEN into a mask.  Lengths larger than
@@ -3843,10 +3857,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_movi_tl(cpu_T1, 1);
                     tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
                     tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
 
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
-                    gen_op_update1_cc();
+                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_update1_cc(s);
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
                 break;
@@ -3872,9 +3886,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 tcg_gen_movi_tl(s->A0, -1);
                 tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
-                gen_op_update1_cc();
+                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -3888,7 +3902,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
                     tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                       cpu_tmp2_i32, cpu_tmp3_i32);
@@ -3897,9 +3911,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
-                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
-                                      cpu_T0, cpu_regs[R_EDX]);
-                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
+                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
+                                      s->T0, cpu_regs[R_EDX]);
+                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
                     tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
                     break;
 #endif
@@ -3921,7 +3935,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 } else {
                     tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
+                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3939,7 +3953,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 } else {
                     tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
+                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
@@ -3997,22 +4011,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         /* If we know TL is 64-bit, and we want a 32-bit
                            result, just do everything in 64-bit arithmetic.  */
                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
-                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
-                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
-                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
-                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
-                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
+                        tcg_gen_ext32u_i64(s->T0, s->T0);
+                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
+                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
+                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
+                        tcg_gen_shri_i64(carry_out, s->T0, 32);
                         break;
 #endif
                     default:
                         /* Otherwise compute the carry-out in two steps.  */
                         zero = tcg_const_tl(0);
-                        tcg_gen_add2_tl(cpu_T0, carry_out,
-                                        cpu_T0, zero,
+                        tcg_gen_add2_tl(s->T0, carry_out,
+                                        s->T0, zero,
                                         carry_in, zero);
                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
                                         cpu_regs[reg], carry_out,
-                                        cpu_T0, zero);
+                                        s->T0, zero);
                         tcg_temp_free(zero);
                         break;
                     }
@@ -4036,19 +4050,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
                 }
                 if (b == 0x1f7) {
-                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
                 } else if (b == 0x2f7) {
                     if (ot != MO_64) {
-                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext32s_tl(s->T0, s->T0);
                     }
-                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
                 } else {
                     if (ot != MO_64) {
-                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext32u_tl(s->T0, s->T0);
                     }
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
 
             case 0x0f3:
@@ -4063,25 +4077,25 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
                     break;
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
+                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
                     break;
                 case 3: /* blsi By, Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_neg_tl(cpu_T1, s->T0);
+                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
                     break;
                 default:
                     goto unknown_op;
                 }
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -4119,22 +4133,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 val = x86_ldub_code(env, s);
                 switch (b) {
                 case 0x14: /* pextrb */
-                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     break;
                 case 0x15: /* pextrw */
-                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_W(val & 7)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUW);
                     }
                     break;
@@ -4166,23 +4180,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     }
                     break;
                 case 0x17: /* extractps */
-                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_L(val & 3)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUL);
                     }
                     break;
                 case 0x20: /* pinsrb */
                     if (mod == 3) {
-                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
+                        gen_op_mov_v_reg(MO_32, s->T0, rm);
                     } else {
-                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
-                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     break;
                 case 0x21: /* insertps */
@@ -4297,13 +4311,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 b = x86_ldub_code(env, s);
                 if (ot == MO_64) {
-                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
+                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
                 } else {
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                     tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
-                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
 
             default:
@@ -4360,8 +4374,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (sz) {
                 case 2:
                     /* 32 bit access */
-                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
-                    tcg_gen_st32_tl(cpu_T0, cpu_env,
+                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                    tcg_gen_st32_tl(s->T0, cpu_env,
                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                     break;
                 case 3:
@@ -4657,8 +4671,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 xor_zero:
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
-                    tcg_gen_movi_tl(cpu_T0, 0);
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
+                    tcg_gen_movi_tl(s->T0, 0);
+                    gen_op_mov_reg_v(ot, reg, s->T0);
                     break;
                 } else {
                     opreg = rm;
@@ -4760,17 +4774,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* For those below that handle locked memory, don't load here.  */
             if (!(s->prefix & PREFIX_LOCK)
                 || op != 2) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
 
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T1, val);
-            gen_op_testl_T0_T1_cc();
+            gen_op_testl_T0_T1_cc(s);
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
@@ -4778,15 +4792,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod == 3) {
                     goto illegal_op;
                 }
-                tcg_gen_movi_tl(cpu_T0, ~0);
-                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
+                tcg_gen_movi_tl(s->T0, ~0);
+                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
             } else {
-                tcg_gen_not_tl(cpu_T0, cpu_T0);
+                tcg_gen_not_tl(s->T0, s->T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, s->A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                 }
             }
             break;
@@ -4803,7 +4817,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 label1 = gen_new_label();
 
                 tcg_gen_mov_tl(a0, s->A0);
-                tcg_gen_mov_tl(t0, cpu_T0);
+                tcg_gen_mov_tl(t0, s->T0);
 
                 gen_set_label(label1);
                 t1 = tcg_temp_new();
@@ -4817,14 +4831,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
                 tcg_temp_free(t2);
                 tcg_temp_free(a0);
-                tcg_gen_mov_tl(cpu_T0, t0);
+                tcg_gen_mov_tl(s->T0, t0);
                 tcg_temp_free(t0);
             } else {
-                tcg_gen_neg_tl(cpu_T0, cpu_T0);
+                tcg_gen_neg_tl(s->T0, s->T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, s->A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                 }
             }
             gen_op_update_neg_cc(s);
@@ -4834,31 +4848,31 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(ot) {
             case MO_8:
                 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
-                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext8u_tl(s->T0, s->T0);
                 tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
                 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
                 tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_shri_tl(s->T0, s->T0, 16);
+                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                   cpu_tmp2_i32, cpu_tmp3_i32);
@@ -4871,7 +4885,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T0, cpu_regs[R_EAX]);
+                                  s->T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
@@ -4883,33 +4897,33 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(ot) {
             case MO_8:
                 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
-                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext8s_tl(s->T0, s->T0);
                 tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
                 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
-                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16s_tl(s->T0, s->T0);
                 tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+                tcg_gen_shri_tl(s->T0, s->T0, 16);
+                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                   cpu_tmp2_i32, cpu_tmp3_i32);
@@ -4924,7 +4938,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T0, cpu_regs[R_EAX]);
+                                  s->T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
@@ -4936,18 +4950,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 6: /* div */
             switch(ot) {
             case MO_8:
-                gen_helper_divb_AL(cpu_env, cpu_T0);
+                gen_helper_divb_AL(cpu_env, s->T0);
                 break;
             case MO_16:
-                gen_helper_divw_AX(cpu_env, cpu_T0);
+                gen_helper_divw_AX(cpu_env, s->T0);
                 break;
             default:
             case MO_32:
-                gen_helper_divl_EAX(cpu_env, cpu_T0);
+                gen_helper_divl_EAX(cpu_env, s->T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_helper_divq_EAX(cpu_env, cpu_T0);
+                gen_helper_divq_EAX(cpu_env, s->T0);
                 break;
 #endif
             }
@@ -4955,18 +4969,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 7: /* idiv */
             switch(ot) {
             case MO_8:
-                gen_helper_idivb_AL(cpu_env, cpu_T0);
+                gen_helper_idivb_AL(cpu_env, s->T0);
                 break;
             case MO_16:
-                gen_helper_idivw_AX(cpu_env, cpu_T0);
+                gen_helper_idivw_AX(cpu_env, s->T0);
                 break;
             default:
             case MO_32:
-                gen_helper_idivl_EAX(cpu_env, cpu_T0);
+                gen_helper_idivl_EAX(cpu_env, s->T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_helper_idivq_EAX(cpu_env, cpu_T0);
+                gen_helper_idivq_EAX(cpu_env, s->T0);
                 break;
 #endif
             }
@@ -5001,9 +5015,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
 
         switch(op) {
@@ -5024,27 +5038,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 2: /* call Ev */
             /* XXX: optimize if memory (no 'and' is necessary) */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
             }
             next_eip = s->pc - s->cs_base;
             tcg_gen_movi_tl(cpu_T1, next_eip);
             gen_push_v(s, cpu_T1);
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
-            gen_jr(s, cpu_T0);
+            gen_jr(s, s->T0);
             break;
         case 3: /* lcall Ev */
             gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
+            gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -5054,30 +5068,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
             }
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
-            gen_jr(s, cpu_T0);
+            gen_jr(s, s->T0);
             break;
         case 5: /* ljmp Ev */
             gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
+            gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
-                gen_op_movl_seg_T0_vm(R_CS);
+                gen_op_movl_seg_T0_vm(s, R_CS);
                 gen_op_jmp_v(cpu_T1);
             }
             tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
             gen_jr(s, cpu_tmp4);
             break;
         case 6: /* push Ev */
-            gen_push_v(s, cpu_T0);
+            gen_push_v(s, s->T0);
             break;
         default:
             goto unknown_op;
@@ -5093,7 +5107,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_v_reg(ot, cpu_T1, reg);
-        gen_op_testl_T0_T1_cc();
+        gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
@@ -5102,9 +5116,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
+        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
         tcg_gen_movi_tl(cpu_T1, val);
-        gen_op_testl_T0_T1_cc();
+        gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
@@ -5112,20 +5126,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
-            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            tcg_gen_ext32s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
-            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
+            tcg_gen_ext8s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5135,22 +5149,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
-            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
+            tcg_gen_sari_tl(s->T0, s->T0, 63);
+            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
-            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
-            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            tcg_gen_ext32s_tl(s->T0, s->T0);
+            tcg_gen_sari_tl(s->T0, s->T0, 31);
+            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
-            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            tcg_gen_sari_tl(s->T0, s->T0, 15);
+            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5179,14 +5193,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
+            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
             break;
 #endif
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
             tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
             tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                               cpu_tmp2_i32, cpu_tmp3_i32);
@@ -5197,14 +5211,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
             break;
         default:
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
             tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
             /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+            tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
+            tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+            gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
@@ -5215,27 +5229,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
-        gen_op_mov_v_reg(ot, cpu_T0, reg);
+        gen_op_mov_v_reg(ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
             gen_op_mov_v_reg(ot, cpu_T1, rm);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
+                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
-                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
             } else {
                 gen_op_ld_v(s, ot, cpu_T1, s->A0);
-                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_st_v(s, ot, cpu_T0, s->A0);
+                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
+                gen_op_st_v(s, ot, s->T0, s->A0);
             }
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
         break;
     case 0x1b0:
@@ -5328,14 +5342,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
-        gen_push_v(s, cpu_T0);
+        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
+        gen_push_v(s, s->T0);
         break;
     case 0x58 ... 0x5f: /* pop */
         ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s, ot);
-        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
+        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5354,8 +5368,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             val = insn_get(env, s, ot);
         else
             val = (int8_t)insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T0, val);
-        gen_push_v(s, cpu_T0);
+        tcg_gen_movi_tl(s->T0, val);
+        gen_push_v(s, s->T0);
         break;
     case 0x8f: /* pop Ev */
         modrm = x86_ldub_code(env, s);
@@ -5365,7 +5379,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -5391,13 +5405,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1e: /* push ds */
         if (CODE64(s))
             goto illegal_op;
-        gen_op_movl_T0_seg(b >> 3);
-        gen_push_v(s, cpu_T0);
+        gen_op_movl_T0_seg(s, b >> 3);
+        gen_push_v(s, s->T0);
         break;
     case 0x1a0: /* push fs */
     case 0x1a8: /* push gs */
-        gen_op_movl_T0_seg((b >> 3) & 7);
-        gen_push_v(s, cpu_T0);
+        gen_op_movl_T0_seg(s, (b >> 3) & 7);
+        gen_push_v(s, s->T0);
         break;
     case 0x07: /* pop es */
     case 0x17: /* pop ss */
@@ -5451,11 +5465,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
         }
         val = insn_get(env, s, ot);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         if (mod != 3) {
-            gen_op_st_v(s, ot, cpu_T0, s->A0);
+            gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
+            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
         }
         break;
     case 0x8a:
@@ -5465,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_op_mov_reg_v(ot, reg, s->T0);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = x86_ldub_code(env, s);
@@ -5491,7 +5505,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
             goto illegal_op;
-        gen_op_movl_T0_seg(reg);
+        gen_op_movl_T0_seg(s, reg);
         ot = mod == 3 ? dflag : MO_16;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
         break;
@@ -5518,30 +5532,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             if (mod == 3) {
                 if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
-                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
+                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    gen_op_mov_v_reg(ot, s->T0, rm);
                     switch (s_ot) {
                     case MO_UB:
-                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext8u_tl(s->T0, s->T0);
                         break;
                     case MO_SB:
-                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext8s_tl(s->T0, s->T0);
                         break;
                     case MO_UW:
-                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext16u_tl(s->T0, s->T0);
                         break;
                     default:
                     case MO_SW:
-                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext16s_tl(s->T0, s->T0);
                         break;
                     }
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_ld_v(s, s_ot, s->T0, s->A0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             }
         }
         break;
@@ -5581,27 +5595,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_movi_tl(s->A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
-                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
+                gen_op_mov_reg_v(ot, R_EAX, s->T0);
             } else {
-                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
-                gen_op_st_v(s, ot, cpu_T0, s->A0);
+                gen_op_mov_v_reg(ot, s->T0, R_EAX);
+                gen_op_st_v(s, ot, s->T0, s->A0);
             }
         }
         break;
     case 0xd7: /* xlat */
         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
-        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
-        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
+        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
+        tcg_gen_add_tl(s->A0, s->A0, s->T0);
         gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
+        gen_op_ld_v(s, MO_8, s->T0, s->A0);
+        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T0, val);
-        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
+        tcg_gen_movi_tl(s->T0, val);
+        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -5610,16 +5624,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* 64 bit case */
             tmp = x86_ldq_code(env, s);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T0, tmp);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
+            tcg_gen_movi_tl(s->T0, tmp);
+            gen_op_mov_reg_v(MO_64, reg, s->T0);
         } else
 #endif
         {
             ot = dflag;
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T0, val);
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            tcg_gen_movi_tl(s->T0, val);
+            gen_op_mov_reg_v(ot, reg, s->T0);
         }
         break;
 
@@ -5638,15 +5652,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(ot, s->T0, reg);
             gen_op_mov_v_reg(ot, cpu_T1, rm);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, rm, s->T0);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(ot, s->T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
+            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
@@ -5678,7 +5692,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_ld_v(s, ot, cpu_T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
+        gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
         gen_op_mov_reg_v(ot, reg, cpu_T1);
@@ -6220,8 +6234,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 switch(rm) {
                 case 0:
                     gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
-                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                     break;
                 default:
                     goto unknown_op;
@@ -6331,7 +6345,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6c: /* insS */
     case 0x6d:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base, 
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6346,7 +6360,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6e: /* outsS */
     case 0x6f:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6366,7 +6380,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe5:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6385,7 +6399,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe7:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
         gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
@@ -6405,13 +6419,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xec:
     case 0xed:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
@@ -6423,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xee:
     case 0xef:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
         gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
@@ -6431,7 +6445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
@@ -6448,17 +6462,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = gen_pop_T0(s);
         gen_stack_update(s, val + (1 << ot));
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T0);
+        gen_op_jmp_v(s->T0);
         gen_bnd_jmp(s);
-        gen_jr(s, cpu_T0);
+        gen_jr(s, s->T0);
         break;
     case 0xc3: /* ret */
         ot = gen_pop_T0(s);
         gen_pop_update(s, ot);
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T0);
+        gen_op_jmp_v(s->T0);
         gen_bnd_jmp(s);
-        gen_jr(s, cpu_T0);
+        gen_jr(s, s->T0);
         break;
     case 0xca: /* lret im */
         val = x86_ldsw_code(env, s);
@@ -6471,14 +6485,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
+            gen_op_ld_v(s, dflag, s->T0, s->A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             /* pop selector */
             gen_add_A0_im(s, 1 << dflag);
-            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
-            gen_op_movl_seg_T0_vm(R_CS);
+            gen_op_ld_v(s, dflag, s->T0, s->A0);
+            gen_op_movl_seg_T0_vm(s, R_CS);
             /* add stack offset */
             gen_stack_update(s, val + (2 << dflag));
         }
@@ -6521,8 +6535,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else if (!CODE64(s)) {
                 tval &= 0xffffffff;
             }
-            tcg_gen_movi_tl(cpu_T0, next_eip);
-            gen_push_v(s, cpu_T0);
+            tcg_gen_movi_tl(s->T0, next_eip);
+            gen_push_v(s, s->T0);
             gen_bnd_jmp(s);
             gen_jmp(s, tval);
         }
@@ -6537,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T0, selector);
+            tcg_gen_movi_tl(s->T0, selector);
             tcg_gen_movi_tl(cpu_T1, offset);
         }
         goto do_lcall;
@@ -6566,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T0, selector);
+            tcg_gen_movi_tl(s->T0, selector);
             tcg_gen_movi_tl(cpu_T1, offset);
         }
         goto do_ljmp;
@@ -6599,7 +6613,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = x86_ldub_code(env, s);
-        gen_setcc1(s, b, cpu_T0);
+        gen_setcc1(s, b, s->T0);
         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
@@ -6620,8 +6634,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_helper_read_eflags(cpu_T0, cpu_env);
-            gen_push_v(s, cpu_T0);
+            gen_helper_read_eflags(s->T0, cpu_env);
+            gen_push_v(s, s->T0);
         }
         break;
     case 0x9d: /* popf */
@@ -6632,13 +6646,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             ot = gen_pop_T0(s);
             if (s->cpl == 0) {
                 if (dflag != MO_16) {
-                    gen_helper_write_eflags(cpu_env, cpu_T0,
+                    gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK |
                                                            IOPL_MASK)));
                 } else {
-                    gen_helper_write_eflags(cpu_env, cpu_T0,
+                    gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK | IOPL_MASK)
@@ -6647,14 +6661,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 if (s->cpl <= s->iopl) {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
                                                                NT_MASK |
                                                                IF_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
@@ -6664,11 +6678,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     }
                 } else {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)
                                                          & 0xffff));
@@ -6685,19 +6699,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
+        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
-        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
+        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
-        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
+        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
+        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -6732,10 +6746,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
         /* load shift */
         val = x86_ldub_code(env, s);
@@ -6771,10 +6785,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
     bt_op:
         tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
@@ -6785,46 +6799,46 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 0: /* bt */
                 /* Needs no atomic ops; we surpressed the normal
                    memory load for LOCK above so do it now.  */
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, cpu_tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
                 tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
                 break;
             case 1: /* bts */
-                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
                 break;
             case 2: /* btr */
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_andc_tl(s->T0, s->T0, cpu_tmp0);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_xor_tl(s->T0, s->T0, cpu_tmp0);
                 break;
             }
             if (op != 0) {
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, s->A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                 }
             }
         }
@@ -6865,7 +6879,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T0);
+        gen_extu(ot, s->T0);
 
         /* Note that lzcnt and tzcnt are in different extensions.  */
         if ((prefixes & PREFIX_REPZ)
@@ -6874,23 +6888,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
             int size = 8 << ot;
             /* For lzcnt/tzcnt, C bit is defined related to the input. */
-            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+            tcg_gen_mov_tl(cpu_cc_src, s->T0);
             if (b & 1) {
                 /* For lzcnt, reduce the target_ulong result by the
                    number of zeros that we expect to find at the top.  */
-                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
-                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
+                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
+                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
             } else {
                 /* For tzcnt, a zero input must return the operand size.  */
-                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
+                tcg_gen_ctzi_tl(s->T0, s->T0, size);
             }
             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
-            gen_op_update1_cc();
+            gen_op_update1_cc(s);
             set_cc_op(s, CC_OP_BMILGB + ot);
         } else {
             /* For bsr/bsf, only the Z bit is defined and it is related
                to the input and not the result.  */
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             set_cc_op(s, CC_OP_LOGICB + ot);
 
             /* ??? The manual says that the output is undefined when the
@@ -6901,13 +6915,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
                 tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
-                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
-                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
             } else {
-                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
+                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
             }
         }
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_op_mov_reg_v(ot, reg, s->T0);
         break;
         /************************/
         /* bcd */
@@ -7047,9 +7061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_v_reg(ot, cpu_T0, reg);
+        gen_op_mov_v_reg(ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         if (ot == MO_16) {
             gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
         } else {
@@ -7060,24 +7074,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
-            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
+            gen_op_mov_v_reg(MO_64, s->T0, reg);
+            tcg_gen_bswap64_i64(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_64, reg, s->T0);
         } else
 #endif
         {
-            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
-            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
-            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
+            gen_op_mov_v_reg(MO_32, s->T0, reg);
+            tcg_gen_ext32u_tl(s->T0, s->T0);
+            tcg_gen_bswap32_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_32, reg, s->T0);
         }
         break;
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        gen_compute_eflags_c(s, cpu_T0);
-        tcg_gen_neg_tl(cpu_T0, cpu_T0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
+        gen_compute_eflags_c(s, s->T0);
+        tcg_gen_neg_tl(s->T0, s->T0);
+        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7229,7 +7243,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+            tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
@@ -7242,7 +7256,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
             }
             break;
@@ -7250,7 +7264,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+            tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, tr.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
@@ -7263,7 +7277,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
             }
             break;
@@ -7274,9 +7288,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_update_cc_op(s);
             if (op == 4) {
-                gen_helper_verr(cpu_env, cpu_T0);
+                gen_helper_verr(cpu_env, s->T0);
             } else {
-                gen_helper_verw(cpu_env, cpu_T0);
+                gen_helper_verw(cpu_env, s->T0);
             }
             set_cc_op(s, CC_OP_EFLAGS);
             break;
@@ -7291,15 +7305,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         CASE_MODRM_MEM_OP(0): /* sgdt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0,
+            tcg_gen_ld32u_tl(s->T0,
                              cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
+            gen_op_st_v(s, MO_16, s->T0, s->A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             break;
 
         case 0xc8: /* monitor */
@@ -7347,14 +7361,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         CASE_MODRM_MEM_OP(1): /* sidt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
+            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
+            gen_op_st_v(s, MO_16, s->T0, s->A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             break;
 
         case 0xd0: /* xgetbv */
@@ -7500,11 +7514,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
             tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
             break;
 
@@ -7517,17 +7531,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
             tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
             break;
 
         CASE_MODRM_OP(4): /* smsw */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
             if (CODE64(s)) {
                 mod = (modrm >> 6) & 3;
                 ot = (mod != 3 ? MO_16 : s->dflag);
@@ -7560,7 +7574,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-            gen_helper_lmsw(cpu_env, cpu_T0);
+            gen_helper_lmsw(cpu_env, s->T0);
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
             break;
@@ -7584,10 +7598,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (s->cpl != 0) {
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
-                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
+                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
-                    tcg_gen_st_tl(cpu_T0, cpu_env,
+                    tcg_gen_st_tl(s->T0, cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
                 }
                 break;
@@ -7638,16 +7652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
+                gen_op_mov_v_reg(MO_32, s->T0, rm);
                 /* sign extend */
                 if (d_ot == MO_64) {
-                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+                    tcg_gen_ext32s_tl(s->T0, s->T0);
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             }
         } else
 #endif
@@ -7712,9 +7726,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
-                gen_helper_lar(t0, cpu_env, cpu_T0);
+                gen_helper_lar(t0, cpu_env, s->T0);
             } else {
-                gen_helper_lsl(t0, cpu_env, cpu_T0);
+                gen_helper_lsl(t0, cpu_env, s->T0);
             }
             tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
@@ -7816,16 +7830,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
-                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
                 } else {
-                    tcg_gen_movi_tl(cpu_T0, 0);
+                    tcg_gen_movi_tl(s->T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
+                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
                 } else {
-                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
+                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
                 }
@@ -7921,15 +7935,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
-                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
                 } else {
-                    tcg_gen_movi_tl(cpu_T0, 0);
+                    tcg_gen_movi_tl(s->T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
+                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 } else {
-                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
+                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 }
             }
@@ -7973,9 +7987,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    gen_op_mov_v_reg(ot, s->T0, rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
-                                         cpu_T0);
+                                         s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -7985,8 +7999,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -8019,16 +8033,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(ot, cpu_T0, rm);
+                gen_op_mov_v_reg(ot, s->T0, rm);
                 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
+                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
                 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
-                gen_op_mov_reg_v(ot, rm, cpu_T0);
+                gen_helper_get_dr(s->T0, cpu_env, cpu_tmp2_i32);
+                gen_op_mov_reg_v(ot, rm, s->T0);
             }
         }
         break;
@@ -8107,8 +8121,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
-            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
+            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
+            gen_op_st_v(s, MO_32, s->T0, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(4): /* xsave */
@@ -8287,10 +8301,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T0);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_extu(ot, s->T0);
+        tcg_gen_mov_tl(cpu_cc_src, s->T0);
+        tcg_gen_ctpop_tl(s->T0, s->T0);
+        gen_op_mov_reg_v(ot, reg, s->T0);
 
         set_cc_op(s, CC_OP_POPCNT);
         break;
@@ -8456,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
         printf("ERROR addseg\n");
 #endif
 
-    cpu_T0 = tcg_temp_new();
+    dc->T0 = tcg_temp_new();
     cpu_T1 = tcg_temp_new();
     dc->A0 = tcg_temp_new();
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (2 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:48   ` Richard Henderson
  2018-09-13 14:26   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
                   ` (9 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 341 ++++++++++++++++++++--------------------
 1 file changed, 170 insertions(+), 171 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 73fd7e5b9a..bd27e65344 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -78,8 +78,6 @@ static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
-/* local temps */
-static TCGv cpu_T1;
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
@@ -139,6 +137,7 @@ typedef struct DisasContext {
     TCGv cc_srcT;
     TCGv A0;
     TCGv T0;
+    TCGv T1;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -656,20 +655,20 @@ static void gen_op_update1_cc(DisasContext *s)
 
 static void gen_op_update2_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 {
     tcg_gen_mov_tl(cpu_cc_src2, reg);
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 {
-    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
+    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 }
 
 static void gen_op_update_neg_cc(DisasContext *s)
@@ -1090,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, s->A0);
+    gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_EDI);
@@ -1099,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, s->A0);
+    gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(s, ot);
@@ -1274,11 +1273,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
+            tcg_gen_add_tl(s1->T0, cpu_tmp4, s1->T1);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
             tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
@@ -1288,12 +1287,12 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T1, cpu_tmp4);
             tcg_gen_neg_tl(s1->T0, s1->T0);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
             tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
@@ -1302,10 +1301,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update2_cc(s1);
@@ -1313,13 +1312,13 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_neg_tl(s1->T0, cpu_T1);
+            tcg_gen_neg_tl(s1->T0, s1->T1);
             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
         } else {
             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
-            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update2_cc(s1);
@@ -1328,10 +1327,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update1_cc(s1);
@@ -1339,10 +1338,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update1_cc(s1);
@@ -1350,19 +1349,19 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
-        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
+        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
@@ -1447,28 +1446,28 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
-    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
+    tcg_gen_andi_tl(s->T1, s->T1, mask);
+    tcg_gen_subi_tl(cpu_tmp0, s->T1, 1);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, s->T0);
             tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
-            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
         } else {
             gen_extu(ot, s->T0);
             tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
-            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
         }
     } else {
         tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
-        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
+        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
+    gen_shift_flags(s, ot, s->T0, cpu_tmp0, s->T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1523,7 +1522,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
         gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
+    tcg_gen_andi_tl(s->T1, s->T1, mask);
 
     switch (ot) {
     case MO_8:
@@ -1539,7 +1538,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 #ifdef TARGET_X86_64
     case MO_32:
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         if (is_right) {
             tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         } else {
@@ -1550,9 +1549,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 #endif
     default:
         if (is_right) {
-            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
         } else {
-            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
         }
         break;
     }
@@ -1584,7 +1583,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
        exactly as we computed above.  */
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t1, cpu_T1);
+    tcg_gen_trunc_tl_i32(t1, s->T1);
     tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
@@ -1689,17 +1688,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (is_right) {
         switch (ot) {
         case MO_8:
-            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_16:
-            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_32:
-            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
             break;
 #endif
         default:
@@ -1708,17 +1707,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     } else {
         switch (ot) {
         case MO_8:
-            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_16:
-            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_32:
-            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
             break;
 #endif
         default:
@@ -1752,11 +1751,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
-            tcg_gen_mov_tl(cpu_T1, s->T0);
+            tcg_gen_deposit_tl(cpu_tmp0, s->T0, s->T1, 16, 16);
+            tcg_gen_mov_tl(s->T1, s->T0);
             tcg_gen_mov_tl(s->T0, cpu_tmp0);
         } else {
-            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
+            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
@@ -1764,11 +1763,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
+            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
             tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
             tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
-            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
+            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
             tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
             tcg_gen_shl_i64(s->T0, s->T0, count);
             tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
@@ -1783,24 +1782,24 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
             tcg_gen_shr_tl(s->T0, s->T0, count);
-            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
+            tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
         } else {
             tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
-                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
+                tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
                 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
             tcg_gen_shl_tl(s->T0, s->T0, count);
-            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
+            tcg_gen_shr_tl(s->T1, s->T1, cpu_tmp4);
         }
         tcg_gen_movi_tl(cpu_tmp4, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
-                           cpu_tmp4, cpu_T1);
-        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
+        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, cpu_tmp4,
+                           cpu_tmp4, s->T1);
+        tcg_gen_or_tl(s->T0, s->T0, s->T1);
         break;
     }
 
@@ -1814,7 +1813,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_v_reg(ot, cpu_T1, s);
+        gen_op_mov_v_reg(ot, s1->T1, s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -1862,7 +1861,7 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
         break;
     default:
         /* currently not optimized */
-        tcg_gen_movi_tl(cpu_T1, c);
+        tcg_gen_movi_tl(s1->T1, c);
         gen_shift(s1, op, ot, d, OR_TMP1);
         break;
     }
@@ -2242,7 +2241,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
 
     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-    cc = gen_prepare_cc(s, b, cpu_T1);
+    cc = gen_prepare_cc(s, b, s->T1);
     if (cc.mask != -1) {
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
@@ -2416,8 +2415,8 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     int size = 1 << d_ot;
 
     /* Push BP; compute FrameTemp into T1.  */
-    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
-    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
+    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
+    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
 
     level &= 31;
@@ -2430,23 +2429,23 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
             gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
 
-            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
+            tcg_gen_subi_tl(s->A0, s->T1, size * i);
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
             gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
-        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
+        tcg_gen_subi_tl(s->A0, s->T1, size * level);
         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
+        gen_op_st_v(s, d_ot, s->T1, s->A0);
     }
 
     /* Copy the FrameTemp value to EBP.  */
-    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
+    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
 
     /* Compute the final value of ESP.  */
-    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
-    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
+    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
 }
 
 static void gen_leave(DisasContext *s)
@@ -2457,10 +2456,10 @@ static void gen_leave(DisasContext *s)
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
     gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
-    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
+    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
 
     gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
-    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
@@ -3854,10 +3853,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
                                        s->A0, bound);
                     tcg_temp_free(bound);
-                    tcg_gen_movi_tl(cpu_T1, 1);
-                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
-                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
-                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_movi_tl(s->T1, 1);
+                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
+                    tcg_gen_subi_tl(s->T1, s->T1, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
 
                     gen_op_mov_reg_v(ot, reg, s->T0);
                     gen_op_update1_cc(s);
@@ -3873,19 +3872,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     /* Note that since we're using BMILG (in order to get O
                        cleared) we need to store the inverse into C.  */
                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
-                                       cpu_T1, bound);
-                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
-                                       bound, bound, cpu_T1);
+                                       s->T1, bound);
+                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
+                                       bound, bound, s->T1);
                     tcg_temp_free(bound);
                 }
                 tcg_gen_movi_tl(s->A0, -1);
-                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
+                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
                 gen_op_mov_reg_v(ot, reg, s->T0);
                 gen_op_update1_cc(s);
@@ -3911,10 +3910,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
-                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
+                    tcg_gen_mulu2_i64(s->T0, s->T1,
                                       s->T0, cpu_regs[R_EDX]);
                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
-                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
+                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
                     break;
 #endif
                 }
@@ -3931,11 +3930,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
+                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3949,11 +3948,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
+                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
@@ -4045,22 +4044,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 if (ot == MO_64) {
-                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
+                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
                 } else {
-                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
+                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
                 }
                 if (b == 0x1f7) {
-                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
                 } else if (b == 0x2f7) {
                     if (ot != MO_64) {
                         tcg_gen_ext32s_tl(s->T0, s->T0);
                     }
-                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
                 } else {
                     if (ot != MO_64) {
                         tcg_gen_ext32u_tl(s->T0, s->T0);
                     }
-                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
                 }
                 gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
@@ -4080,16 +4079,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
-                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_subi_tl(s->T1, s->T0, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
                     break;
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
-                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_subi_tl(s->T1, s->T0, 1);
+                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
                     break;
                 case 3: /* blsi By, Ey */
-                    tcg_gen_neg_tl(cpu_T1, s->T0);
-                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_neg_tl(s->T1, s->T0);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
                     break;
                 default:
                     goto unknown_op;
@@ -4677,7 +4676,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_v_reg(ot, cpu_T1, reg);
+                gen_op_mov_v_reg(ot, s->T1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4687,17 +4686,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
+                    gen_op_ld_v(s, ot, s->T1, s->A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T1, rm);
+                    gen_op_mov_v_reg(ot, s->T1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
                 val = insn_get(env, s, ot);
-                tcg_gen_movi_tl(cpu_T1, val);
+                tcg_gen_movi_tl(s->T1, val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
             }
@@ -4743,7 +4742,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 val = (int8_t)insn_get(env, s, MO_8);
                 break;
             }
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
             gen_op(s, op, ot, opreg);
         }
         break;
@@ -4783,7 +4782,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
             gen_op_testl_T0_T1_cc(s);
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
@@ -4847,22 +4846,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 4: /* mul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
                 tcg_gen_ext8u_tl(s->T0, s->T0);
-                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext8u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
                 tcg_gen_ext16u_tl(s->T0, s->T0);
-                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext16u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
@@ -4896,11 +4895,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 5: /* imul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
                 tcg_gen_ext8s_tl(s->T0, s->T0);
-                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext8s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
@@ -4908,11 +4907,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
                 tcg_gen_ext16s_tl(s->T0, s->T0);
-                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext16s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
@@ -5041,25 +5040,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_ext16u_tl(s->T0, s->T0);
             }
             next_eip = s->pc - s->cs_base;
-            tcg_gen_movi_tl(cpu_T1, next_eip);
-            gen_push_v(s, cpu_T1);
+            tcg_gen_movi_tl(s->T1, next_eip);
+            gen_push_v(s, s->T1);
             gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
             gen_jr(s, s->T0);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_v(s, ot, cpu_T1, s->A0);
+            gen_op_ld_v(s, ot, s->T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, s->T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
+                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, s->T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
@@ -5075,17 +5074,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_jr(s, s->T0);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_v(s, ot, cpu_T1, s->A0);
+            gen_op_ld_v(s, ot, s->T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, s->T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
                 gen_op_movl_seg_T0_vm(s, R_CS);
-                gen_op_jmp_v(cpu_T1);
+                gen_op_jmp_v(s->T1);
             }
             tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
             gen_jr(s, cpu_tmp4);
@@ -5106,7 +5105,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(ot, cpu_T1, reg);
+        gen_op_mov_v_reg(ot, s->T1, reg);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5117,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         val = insn_get(env, s, ot);
 
         gen_op_mov_v_reg(ot, s->T0, OR_EAX);
-        tcg_gen_movi_tl(cpu_T1, val);
+        tcg_gen_movi_tl(s->T1, val);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5183,25 +5182,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
         } else if (b == 0x6b) {
             val = (int8_t)insn_get(env, s, MO_8);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T1, reg);
+            gen_op_mov_v_reg(ot, s->T1, reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
+            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
             break;
 #endif
         case MO_32:
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
             tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                               cpu_tmp2_i32, cpu_tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
@@ -5212,9 +5211,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
         default:
             tcg_gen_ext16s_tl(s->T0, s->T0);
-            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
+            tcg_gen_ext16s_tl(s->T1, s->T1);
             /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
             tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
@@ -5232,22 +5231,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_mov_v_reg(ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, cpu_T1, rm);
-            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_v_reg(ot, s->T1, rm);
+            tcg_gen_add_tl(s->T0, s->T0, s->T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
             gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
+                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
-                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_add_tl(s->T0, s->T0, s->T1);
             } else {
-                gen_op_ld_v(s, ot, cpu_T1, s->A0);
-                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
+                gen_op_ld_v(s, ot, s->T1, s->A0);
+                tcg_gen_add_tl(s->T0, s->T0, s->T1);
                 gen_op_st_v(s, ot, s->T0, s->A0);
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
         }
         gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
@@ -5653,16 +5652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
             gen_op_mov_v_reg(ot, s->T0, reg);
-            gen_op_mov_v_reg(ot, cpu_T1, rm);
+            gen_op_mov_v_reg(ot, s->T1, rm);
             gen_op_mov_reg_v(ot, rm, s->T0);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
         } else {
             gen_lea_modrm(env, s, modrm);
             gen_op_mov_v_reg(ot, s->T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
+            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5689,13 +5688,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, cpu_T1, s->A0);
+        gen_op_ld_v(s, ot, s->T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
         gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
-        gen_op_mov_reg_v(ot, reg, cpu_T1);
+        gen_op_mov_reg_v(ot, reg, s->T1);
         if (s->base.is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -5774,7 +5773,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             opreg = rm;
         }
-        gen_op_mov_v_reg(ot, cpu_T1, reg);
+        gen_op_mov_v_reg(ot, s->T1, reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
@@ -6387,8 +6386,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
+        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        gen_op_mov_reg_v(ot, R_EAX, s->T1);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6402,13 +6401,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
+        gen_op_mov_v_reg(ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6426,8 +6425,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
+        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        gen_op_mov_reg_v(ot, R_EAX, s->T1);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6440,13 +6439,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
+        gen_op_mov_v_reg(ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6552,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             selector = insn_get(env, s, MO_16);
 
             tcg_gen_movi_tl(s->T0, selector);
-            tcg_gen_movi_tl(cpu_T1, offset);
+            tcg_gen_movi_tl(s->T1, offset);
         }
         goto do_lcall;
     case 0xe9: /* jmp im */
@@ -6581,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             selector = insn_get(env, s, MO_16);
 
             tcg_gen_movi_tl(s->T0, selector);
-            tcg_gen_movi_tl(cpu_T1, offset);
+            tcg_gen_movi_tl(s->T1, offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
@@ -6753,7 +6752,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         /* load shift */
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T1, val);
+        tcg_gen_movi_tl(s->T1, val);
         if (op < 4)
             goto unknown_op;
         op -= 4;
@@ -6775,12 +6774,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
+        gen_op_mov_v_reg(MO_32, s->T1, reg);
         if (mod != 3) {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
-            gen_exts(ot, cpu_T1);
-            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
+            gen_exts(ot, s->T1);
+            tcg_gen_sari_tl(cpu_tmp0, s->T1, 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
@@ -6791,9 +6790,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_mov_v_reg(ot, s->T0, rm);
         }
     bt_op:
-        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
+        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
         tcg_gen_movi_tl(cpu_tmp0, 1);
-        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, s->T1);
         if (s->prefix & PREFIX_LOCK) {
             switch (op) {
             case 0: /* bt */
@@ -6816,9 +6815,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
@@ -6914,8 +6913,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (b & 1) {
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
-                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
-                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
             } else {
                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
@@ -7512,14 +7511,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
+            gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
-            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
+            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
             break;
 
         CASE_MODRM_MEM_OP(3): /* lidt */
@@ -7529,14 +7528,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
+            gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
-            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
+            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
             break;
 
         CASE_MODRM_OP(4): /* smsw */
@@ -8471,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 #endif
 
     dc->T0 = tcg_temp_new();
-    cpu_T1 = tcg_temp_new();
+    dc->T1 = tcg_temp_new();
     dc->A0 = tcg_temp_new();
 
     cpu_tmp0 = tcg_temp_new();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (3 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:51   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 282 ++++++++++++++++++++--------------------
 1 file changed, 144 insertions(+), 138 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index bd27e65344..873231fb44 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -78,8 +78,8 @@ static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
-/* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0, cpu_tmp4;
+
+static TCGv cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
@@ -139,6 +139,9 @@ typedef struct DisasContext {
     TCGv T0;
     TCGv T1;
 
+    /* TCG local register indexes (only used inside old micro ops) */
+    TCGv tmp0;
+
     sigjmp_buf jmpbuf;
 } DisasContext;
 
@@ -406,16 +409,17 @@ static inline void gen_op_jmp_v(TCGv dest)
     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 }
 
-static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
+static inline
+void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 {
-    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-    gen_op_mov_reg_v(size, reg, cpu_tmp0);
+    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
+    gen_op_mov_reg_v(size, reg, s->tmp0);
 }
 
 static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
-    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], s->T0);
-    gen_op_mov_reg_v(size, reg, cpu_tmp0);
+    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
+    gen_op_mov_reg_v(size, reg, s->tmp0);
 }
 
 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
@@ -437,10 +441,10 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
     }
 }
 
-static inline void gen_jmp_im(target_ulong pc)
+static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 {
-    tcg_gen_movi_tl(cpu_tmp0, pc);
-    gen_op_jmp_v(cpu_tmp0);
+    tcg_gen_movi_tl(s->tmp0, pc);
+    gen_op_jmp_v(s->tmp0);
 }
 
 /* Compute SEG:REG into A0.  SEG is selected from the override segment
@@ -556,18 +560,20 @@ static void gen_exts(TCGMemOp ot, TCGv reg)
     gen_ext_tl(reg, reg, ot, true);
 }
 
-static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
+static inline
+void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 {
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size, cpu_tmp0);
-    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
+    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
+    gen_extu(size, s->tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 }
 
-static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
+static inline
+void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 {
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size, cpu_tmp0);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
+    gen_extu(size, s->tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 }
 
 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
@@ -627,7 +633,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
     }
     if(s->flags & HF_SVMI_MASK) {
         gen_update_cc_op(s);
-        gen_jmp_im(cur_eip);
+        gen_jmp_im(s, cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
@@ -743,9 +749,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_SUBB ... CC_OP_SUBQ:
         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_SUBB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
         /* If no temporary was used, be careful not to alias t1 and t0.  */
-        t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
+        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
         tcg_gen_mov_tl(t0, s->cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
@@ -753,7 +759,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_ADDB ... CC_OP_ADDQ:
         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_ADDB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
     add_sub:
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
@@ -905,7 +911,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_BE:
             tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_extu(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
@@ -918,7 +924,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         fast_jcc_l:
             tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_exts(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
+            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
             cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
@@ -955,7 +961,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_L:
             gen_compute_eflags(s);
             if (reg == cpu_cc_src) {
-                reg = cpu_tmp0;
+                reg = s->tmp0;
             }
             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
@@ -966,7 +972,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             gen_compute_eflags(s);
             if (reg == cpu_cc_src) {
-                reg = cpu_tmp0;
+                reg = s->tmp0;
             }
             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
@@ -1061,7 +1067,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 {
     TCGLabel *l1 = gen_new_label();
     TCGLabel *l2 = gen_new_label();
-    gen_op_jnz_ecx(s->aflag, l1);
+    gen_op_jnz_ecx(s, s->aflag, l1);
     gen_set_label(l2);
     gen_jmp_tb(s, next_eip, 1);
     gen_set_label(l1);
@@ -1171,11 +1177,11 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
     gen_update_cc_op(s);                                                      \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
-    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
     /* a loop would cause two single step exceptions if ECX = 1               \
        before rep string_insn */                                              \
     if (s->repz_opt)                                                          \
-        gen_op_jz_ecx(s->aflag, l2);                                          \
+        gen_op_jz_ecx(s, s->aflag, l2);                                       \
     gen_jmp(s, cur_eip);                                                      \
 }
 
@@ -1189,11 +1195,11 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
     gen_update_cc_op(s);                                                      \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
-    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
     gen_update_cc_op(s);                                                      \
     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
     if (s->repz_opt)                                                          \
-        gen_op_jz_ecx(s->aflag, l2);                                          \
+        gen_op_jz_ecx(s, s->aflag, l2);                                       \
     gen_jmp(s, cur_eip);                                                      \
 }
 
@@ -1447,27 +1453,27 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
-    tcg_gen_subi_tl(cpu_tmp0, s->T1, 1);
+    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, s->T0);
-            tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
         } else {
             gen_extu(ot, s->T0);
-            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
         }
     } else {
-        tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
+        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, cpu_tmp0, s->T1, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1640,9 +1646,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
                 shift = mask + 1 - shift;
             }
             gen_extu(ot, s->T0);
-            tcg_gen_shli_tl(cpu_tmp0, s->T0, shift);
+            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
-            tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
+            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
             break;
         }
     }
@@ -1751,9 +1757,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, s->T0, s->T1, 16, 16);
+            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
             tcg_gen_mov_tl(s->T1, s->T0);
-            tcg_gen_mov_tl(s->T0, cpu_tmp0);
+            tcg_gen_mov_tl(s->T0, s->tmp0);
         } else {
             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
         }
@@ -1761,35 +1767,35 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 #ifdef TARGET_X86_64
     case MO_32:
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
-        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        tcg_gen_subi_tl(s->tmp0, count, 1);
         if (is_right) {
             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
-            tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
             tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
-            tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
             tcg_gen_shl_i64(s->T0, s->T0, count);
-            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
+            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
             tcg_gen_shri_i64(s->T0, s->T0, 32);
         }
         break;
 #endif
     default:
-        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        tcg_gen_subi_tl(s->tmp0, count, 1);
         if (is_right) {
-            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
             tcg_gen_shr_tl(s->T0, s->T0, count);
             tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
         } else {
-            tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
                 tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
-                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+                tcg_gen_or_tl(s->tmp0, s->tmp0, cpu_tmp4);
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
@@ -1806,7 +1812,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, cpu_tmp0, count, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
@@ -2196,13 +2202,13 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
     if (use_goto_tb(s, pc))  {
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
-        gen_jmp_im(eip);
+        gen_jmp_im(s, eip);
         tcg_gen_exit_tb(s->base.tb, tb_num);
         s->base.is_jmp = DISAS_NORETURN;
     } else {
         /* jump to another page */
-        gen_jmp_im(eip);
-        gen_jr(s, cpu_tmp0);
+        gen_jmp_im(s, eip);
+        gen_jr(s, s->tmp0);
     }
 }
 
@@ -2224,11 +2230,11 @@ static inline void gen_jcc(DisasContext *s, int b,
         l2 = gen_new_label();
         gen_jcc1(s, b, l1);
 
-        gen_jmp_im(next_eip);
+        gen_jmp_im(s, next_eip);
         tcg_gen_br(l2);
 
         gen_set_label(l1);
-        gen_jmp_im(val);
+        gen_jmp_im(s, val);
         gen_set_label(l2);
         gen_eob(s);
     }
@@ -2312,7 +2318,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
     gen_update_cc_op(s);
-    gen_jmp_im(pc_start - s->cs_base);
+    gen_jmp_im(s, pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
 }
@@ -2325,7 +2331,7 @@ gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
 
 static inline void gen_stack_update(DisasContext *s, int addend)
 {
-    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
+    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
 }
 
 /* Generate a push. It depends on ss32, addseg and dflag.  */
@@ -2427,11 +2433,11 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
         for (i = 1; i < level; ++i) {
             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
-            gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
+            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
 
             tcg_gen_subi_tl(s->A0, s->T1, size * i);
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
-            gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
+            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
@@ -2465,7 +2471,7 @@ static void gen_leave(DisasContext *s)
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2502,7 +2508,7 @@ static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
     s->base.is_jmp = DISAS_NORETURN;
@@ -2511,7 +2517,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_debug(cpu_env);
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2621,7 +2627,7 @@ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
     } else {
-        gen_jmp_im(eip);
+        gen_jmp_im(s, eip);
         gen_eob(s);
     }
 }
@@ -2648,8 +2654,8 @@ static inline void gen_ldo_env_A0(DisasContext *s, int offset)
     int mem_index = s->mem_index;
     tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
@@ -2658,9 +2664,9 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
     int mem_index = s->mem_index;
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
-    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
 }
 
 static inline void gen_op_movo(int d_offset, int s_offset)
@@ -3713,9 +3719,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
-                        tcg_gen_qemu_ld_tl(cpu_tmp0, s->A0,
+                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
                                            s->mem_index, MO_LEUW);
-                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_W(0)));
                         break;
                     case 0x2a:            /* movntqda */
@@ -3999,7 +4005,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
                             gen_compute_eflags(s);
                         }
-                        carry_in = cpu_tmp0;
+                        carry_in = s->tmp0;
                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
                     }
@@ -4902,8 +4908,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
-                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+                tcg_gen_ext8s_tl(s->tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
@@ -4914,8 +4920,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
-                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+                tcg_gen_ext16s_tl(s->tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
                 gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
@@ -5215,8 +5221,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* XXX: use 32 bit mul which could be faster */
             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-            tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
-            tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+            tcg_gen_ext16s_tl(s->tmp0, s->T0);
+            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
             gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         }
@@ -5423,7 +5429,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_pop_update(s, ot);
         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
                 s->tf = 0;
                 gen_eob_inhibit_irq(s, true);
@@ -5438,7 +5444,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_movl_seg_T0(s, (b >> 3) & 7);
         gen_pop_update(s, ot);
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -5489,7 +5495,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_movl_seg_T0(s, reg);
         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
                 s->tf = 0;
                 gen_eob_inhibit_irq(s, true);
@@ -5696,7 +5702,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /* then put the data */
         gen_op_mov_reg_v(ot, reg, s->T1);
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -6478,7 +6484,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
         } else {
@@ -6691,7 +6697,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_pop_update(s, ot);
             set_cc_op(s, CC_OP_EFLAGS);
             /* abort translation because TF/AC flag may change */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -6779,9 +6785,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
             gen_exts(ot, s->T1);
-            tcg_gen_sari_tl(cpu_tmp0, s->T1, 3 + ot);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
+            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
+            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
+            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
                 gen_op_ld_v(s, ot, s->T0, s->A0);
@@ -6791,8 +6797,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
     bt_op:
         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
-        tcg_gen_movi_tl(cpu_tmp0, 1);
-        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, s->T1);
+        tcg_gen_movi_tl(s->tmp0, 1);
+        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
         if (s->prefix & PREFIX_LOCK) {
             switch (op) {
             case 0: /* bt */
@@ -6801,17 +6807,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
-                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, cpu_tmp0,
+                tcg_gen_not_tl(s->tmp0, s->tmp0);
+                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
@@ -6823,14 +6829,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* Data already loaded; nothing to do.  */
                 break;
             case 1: /* bts */
-                tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
+                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
                 break;
             case 2: /* btr */
-                tcg_gen_andc_tl(s->T0, s->T0, cpu_tmp0);
+                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_xor_tl(s->T0, s->T0, cpu_tmp0);
+                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
                 break;
             }
             if (op != 0) {
@@ -6983,7 +6989,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         if (prefixes & PREFIX_REPZ) {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->base.is_jmp = DISAS_NORETURN;
         }
@@ -7011,7 +7017,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
 #ifdef WANT_ICEBP
@@ -7045,7 +7051,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
             gen_helper_sti(cpu_env);
             /* interruptions are enabled only the first insn after sti */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob_inhibit_irq(s, true);
         } else {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
@@ -7113,26 +7119,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(b) {
             case 0: /* loopnz */
             case 1: /* loopz */
-                gen_op_add_reg_im(s->aflag, R_ECX, -1);
-                gen_op_jz_ecx(s->aflag, l3);
+                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+                gen_op_jz_ecx(s, s->aflag, l3);
                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
                 break;
             case 2: /* loop */
-                gen_op_add_reg_im(s->aflag, R_ECX, -1);
-                gen_op_jnz_ecx(s->aflag, l1);
+                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+                gen_op_jnz_ecx(s, s->aflag, l1);
                 break;
             default:
             case 3: /* jcxz */
-                gen_op_jz_ecx(s->aflag, l1);
+                gen_op_jz_ecx(s, s->aflag, l1);
                 break;
             }
 
             gen_set_label(l3);
-            gen_jmp_im(next_eip);
+            gen_jmp_im(s, next_eip);
             tcg_gen_br(l2);
 
             gen_set_label(l1);
-            gen_jmp_im(tval);
+            gen_jmp_im(s, tval);
             gen_set_label(l2);
             gen_eob(s);
         }
@@ -7143,7 +7149,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             if (b & 2) {
                 gen_helper_rdmsr(cpu_env);
             } else {
@@ -7153,7 +7159,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
     case 0x131: /* rdtsc */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
@@ -7165,7 +7171,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
     case 0x133: /* rdpmc */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_rdpmc(cpu_env);
         break;
     case 0x134: /* sysenter */
@@ -7194,7 +7200,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         /* TF handling for the syscall insn is different. The TF bit is  checked
            after the syscall insn completes. This allows #DB to not be
@@ -7220,7 +7226,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #endif
     case 0x1a2: /* cpuid */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_cpuid(cpu_env);
         break;
     case 0xf4: /* hlt */
@@ -7228,7 +7234,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->base.is_jmp = DISAS_NORETURN;
         }
@@ -7320,7 +7326,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
             gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
@@ -7332,7 +7338,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
             gen_eob(s);
             break;
@@ -7343,7 +7349,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_helper_clac(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7353,7 +7359,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_helper_stac(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7396,7 +7402,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
             gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
             /* End TB because translation flags may change.  */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7409,7 +7415,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
                              tcg_const_i32(s->pc - pc_start));
             tcg_gen_exit_tb(NULL, 0);
@@ -7421,7 +7427,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmmcall(cpu_env);
             break;
 
@@ -7434,7 +7440,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7447,7 +7453,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7463,7 +7469,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_update_cc_op(s);
             gen_helper_stgi(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7476,7 +7482,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_clgi(cpu_env);
             break;
 
@@ -7487,7 +7493,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_skinit(cpu_env);
             break;
 
@@ -7500,7 +7506,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7574,7 +7580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_helper_lmsw(cpu_env, s->T0);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7584,10 +7590,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_lea_modrm(env, s, modrm);
             gen_helper_invlpg(cpu_env, s->A0);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7613,7 +7619,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                 gen_io_start();
             }
@@ -7688,11 +7694,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 a0 = NULL;
             }
             gen_op_mov_v_reg(ot, t1, reg);
-            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+            tcg_gen_andi_tl(s->tmp0, t0, 3);
             tcg_gen_andi_tl(t1, t1, 3);
             tcg_gen_movi_tl(t2, 0);
             label1 = gen_new_label();
-            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
             tcg_gen_andi_tl(t0, t0, ~3);
             tcg_gen_or_tl(t0, t0, t1);
             tcg_gen_movi_tl(t2, CC_Z);
@@ -7729,9 +7735,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_helper_lsl(t0, cpu_env, s->T0);
             }
-            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
+            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
             gen_op_mov_reg_v(ot, reg, t0);
             gen_set_label(label1);
             set_cc_op(s, CC_OP_EFLAGS);
@@ -7981,7 +7987,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 4:
             case 8:
                 gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
+                gen_jmp_im(s, pc_start - s->cs_base);
                 if (b & 2) {
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
@@ -7992,7 +7998,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
-                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_jmp_im(s, s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -8035,7 +8041,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_mov_v_reg(ot, s->T0, rm);
                 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
                 gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
-                gen_jmp_im(s->pc - s->cs_base);
+                gen_jmp_im(s, s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
@@ -8052,7 +8058,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_helper_clts(cpu_env);
             /* abort block because static cpu state changed */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -8149,7 +8155,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -8279,7 +8285,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
-        gen_jmp_im(s->pc - s->cs_base);
+        gen_jmp_im(s, s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
         break;
@@ -8473,7 +8479,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->T1 = tcg_temp_new();
     dc->A0 = tcg_temp_new();
 
-    cpu_tmp0 = tcg_temp_new();
+    dc->tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
@@ -8550,7 +8556,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
     DisasContext *dc = container_of(dcbase, DisasContext, base);
 
     if (dc->base.is_jmp == DISAS_TOO_MANY) {
-        gen_jmp_im(dc->base.pc_next - dc->cs_base);
+        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
         gen_eob(dc);
     }
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (4 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:52   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 78 ++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 873231fb44..0ad6ffc4af 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
@@ -141,6 +140,7 @@ typedef struct DisasContext {
 
     /* TCG local register indexes (only used inside old micro ops) */
     TCGv tmp0;
+    TCGv tmp4;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -909,10 +909,10 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
-            gen_extu(size, cpu_tmp4);
+            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
+            gen_extu(size, s->tmp4);
             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
+            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
@@ -922,10 +922,10 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
-            gen_exts(size, cpu_tmp4);
+            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
+            gen_exts(size, s->tmp4);
             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
-            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
+            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
@@ -1277,32 +1277,32 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     }
     switch(op) {
     case OP_ADCL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, s1->tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, cpu_tmp4, s1->T1);
+            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
-            tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(s1, cpu_tmp4);
+        gen_op_update3_cc(s1, s1->tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, s1->tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, s1->T1, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
             tcg_gen_neg_tl(s1->T0, s1->T0);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
-            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(s1, cpu_tmp4);
+        gen_op_update3_cc(s1, s1->tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
@@ -1492,15 +1492,15 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         if (is_right) {
             if (is_arith) {
                 gen_exts(ot, s->T0);
-                tcg_gen_sari_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
                 tcg_gen_sari_tl(s->T0, s->T0, op2);
             } else {
                 gen_extu(ot, s->T0);
-                tcg_gen_shri_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
                 tcg_gen_shri_tl(s->T0, s->T0, op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp4, s->T0, op2 - 1);
+            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
             tcg_gen_shli_tl(s->T0, s->T0, op2);
         }
     }
@@ -1510,7 +1510,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* update eflags if non zero shift */
     if (op2 != 0) {
-        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
@@ -1786,25 +1786,25 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         if (is_right) {
             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
 
-            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
             tcg_gen_shr_tl(s->T0, s->T0, count);
-            tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
+            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
         } else {
             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
-                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
-                tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
-                tcg_gen_or_tl(s->tmp0, s->tmp0, cpu_tmp4);
+                tcg_gen_subfi_tl(s->tmp4, 33, count);
+                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
+                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
             }
 
-            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
             tcg_gen_shl_tl(s->T0, s->T0, count);
-            tcg_gen_shr_tl(s->T1, s->T1, cpu_tmp4);
+            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
         }
-        tcg_gen_movi_tl(cpu_tmp4, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, cpu_tmp4,
-                           cpu_tmp4, s->T1);
+        tcg_gen_movi_tl(s->tmp4, 0);
+        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
+                           s->tmp4, s->T1);
         tcg_gen_or_tl(s->T0, s->T0, s->T1);
         break;
     }
@@ -2346,7 +2346,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
 
     if (!CODE64(s)) {
         if (s->addseg) {
-            new_esp = cpu_tmp4;
+            new_esp = s->tmp4;
             tcg_gen_mov_tl(new_esp, s->A0);
         }
         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
@@ -5068,8 +5068,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
-            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
-            gen_jr(s, cpu_tmp4);
+            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, s->tmp4);
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
@@ -5092,8 +5092,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_movl_seg_T0_vm(s, R_CS);
                 gen_op_jmp_v(s->T1);
             }
-            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
-            gen_jr(s, cpu_tmp4);
+            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, s->tmp4);
             break;
         case 6: /* push Ev */
             gen_push_v(s, s->T0);
@@ -6821,9 +6821,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
@@ -6867,13 +6867,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                We can get that same Z value (and the new C value) by leaving
                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
                same width.  */
-            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
             break;
         default:
             /* Otherwise, generate EFLAGS and replace the C bit.  */
             gen_compute_eflags(s);
-            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
+            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
                                ctz32(CC_C), 1);
             break;
         }
@@ -8483,7 +8483,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp1_i64 = tcg_temp_new_i64();
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
-    cpu_tmp4 = tcg_temp_new();
+    dc->tmp4 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
     dc->cc_srcT = tcg_temp_local_new();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (5 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:53   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 101 +++++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 49 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 0ad6ffc4af..9531dafebe 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_ptr cpu_ptr0, cpu_ptr1;
+static TCGv_ptr cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
@@ -141,6 +141,7 @@ typedef struct DisasContext {
     /* TCG local register indexes (only used inside old micro ops) */
     TCGv tmp0;
     TCGv tmp4;
+    TCGv_ptr ptr0;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -3147,27 +3148,27 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
+                gen_helper_movl_mm_T0_mmx(s->ptr0, cpu_tmp2_i32);
             }
             break;
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                gen_helper_movq_mm_T0_xmm(cpu_ptr0, s->T0);
+                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
+                gen_helper_movl_mm_T0_xmm(s->ptr0, cpu_tmp2_i32);
             }
             break;
         case 0x6f: /* movq mm, ea */
@@ -3312,14 +3313,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     goto illegal_op;
                 field_length = x86_ldub_code(env, s) & 0x3F;
                 bit_index = x86_ldub_code(env, s) & 0x3F;
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                     offsetof(CPUX86State,xmm_regs[reg]));
                 if (b1 == 1)
-                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
+                    gen_helper_extrq_i(cpu_env, s->ptr0,
                                        tcg_const_i32(bit_index),
                                        tcg_const_i32(field_length));
                 else
-                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
+                    gen_helper_insertq_i(cpu_env, s->ptr0,
                                          tcg_const_i32(bit_index),
                                          tcg_const_i32(field_length));
             }
@@ -3471,22 +3472,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7);
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, s->ptr0);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, s->ptr0);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x02a: /* cvtpi2ps */
@@ -3501,15 +3502,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             }
             break;
@@ -3518,15 +3519,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             ot = mo_64_32(s->dflag);
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
+                sse_fn_epi(cpu_env, s->ptr0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
-                sse_fn_epl(cpu_env, cpu_ptr0, s->T0);
+                sse_fn_epl(cpu_env, s->ptr0, s->T0);
 #else
                 goto illegal_op;
 #endif
@@ -3546,20 +3547,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             }
             break;
@@ -3582,17 +3583,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
             if (ot == MO_32) {
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
-                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, s->ptr0);
                 tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
-                sse_fn_l_ep(s->T0, cpu_env, cpu_ptr0);
+                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
 #else
                 goto illegal_op;
 #endif
@@ -3665,12 +3666,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
             if (b1) {
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
+                                 offsetof(CPUX86State, xmm_regs[rm]));
+                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, s->ptr0);
             } else {
                 rm = (modrm & 7);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
+                                 offsetof(CPUX86State, fpregs[rm].mmx));
+                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, s->ptr0);
             }
             reg = ((modrm >> 3) & 7) | rex_r;
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
@@ -3745,9 +3748,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto unknown_op;
             }
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
 
             if (b == 0x17) {
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -4294,9 +4297,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
             }
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_eppi(cpu_env, s->ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
 
         case 0x33a:
@@ -4417,18 +4420,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
                 goto illegal_op;
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
             val = x86_ldub_code(env, s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
-            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_ppi(s->ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
@@ -4437,9 +4440,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto unknown_op;
             sse_fn_epp = sse_op_table4[val][b1];
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
@@ -4449,16 +4452,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, s->A0);
+            sse_fn_eppt(cpu_env, s->ptr0, cpu_ptr1, s->A0);
             break;
         default:
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
@@ -8484,7 +8487,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
-    cpu_ptr0 = tcg_temp_new_ptr();
+    dc->ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
     dc->cc_srcT = tcg_temp_local_new();
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (6 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:54   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 52 ++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9531dafebe..c51f61ca2c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_ptr cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
@@ -142,6 +141,7 @@ typedef struct DisasContext {
     TCGv tmp0;
     TCGv tmp4;
     TCGv_ptr ptr0;
+    TCGv_ptr ptr1;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -3473,8 +3473,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
@@ -3503,14 +3503,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
                 break;
             }
             break;
@@ -3548,19 +3548,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             }
             break;
@@ -3749,8 +3749,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
 
             if (b == 0x17) {
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -4298,8 +4298,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_eppi(cpu_env, s->ptr0, cpu_ptr1, tcg_const_i32(val));
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
             break;
 
         case 0x33a:
@@ -4421,17 +4421,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
             }
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
             val = x86_ldub_code(env, s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
-            sse_fn_ppi(s->ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
@@ -4441,8 +4441,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             sse_fn_epp = sse_op_table4[val][b1];
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
@@ -4453,15 +4453,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             gen_add_A0_ds_seg(s);
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, s->ptr0, cpu_ptr1, s->A0);
+            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
             break;
         default:
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
@@ -8488,7 +8488,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
     dc->ptr0 = tcg_temp_new_ptr();
-    cpu_ptr1 = tcg_temp_new_ptr();
+    dc->ptr1 = tcg_temp_new_ptr();
     dc->cc_srcT = tcg_temp_local_new();
 }
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (7 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:55   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 347 ++++++++++++++++++++--------------------
 1 file changed, 174 insertions(+), 173 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c51f61ca2c..ec68f7dba1 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static TCGv_i32 cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
 #include "exec/gen-icount.h"
@@ -142,6 +142,7 @@ typedef struct DisasContext {
     TCGv tmp4;
     TCGv_ptr ptr0;
     TCGv_ptr ptr1;
+    TCGv_i32 tmp2_i32;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -617,16 +618,16 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
     target_ulong next_eip;
 
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         switch (ot) {
         case MO_8:
-            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iob(cpu_env, s->tmp2_i32);
             break;
         case MO_16:
-            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iow(cpu_env, s->tmp2_i32);
             break;
         case MO_32:
-            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iol(cpu_env, s->tmp2_i32);
             break;
         default:
             tcg_abort();
@@ -637,8 +638,8 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
         gen_jmp_im(s, cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
                                 tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
     }
@@ -1136,13 +1137,13 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
        case of page fault. */
     tcg_gen_movi_tl(s->T0, 0);
     gen_op_st_v(s, ot, s->T0, s->A0);
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
-    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    gen_helper_in_func(ot, s->T0, cpu_tmp2_i32);
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
+    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
     gen_op_st_v(s, ot, s->T0, s->A0);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_EDI);
-    gen_bpt_io(s, cpu_tmp2_i32, ot);
+    gen_bpt_io(s, s->tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1156,13 +1157,13 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_ESI(s);
     gen_op_ld_v(s, ot, s->T0, s->A0);
 
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
-    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
-    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+    gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
-    gen_bpt_io(s, cpu_tmp2_i32, ot);
+    gen_bpt_io(s, s->tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1421,7 +1422,7 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
     tcg_temp_free(z_tl);
 
     /* Get the two potential CC_OP values into temporaries.  */
-    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     if (s->cc_op == CC_OP_DYNAMIC) {
         oldop = cpu_cc_op;
     } else {
@@ -1433,7 +1434,7 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
     z32 = tcg_const_i32(0);
     s32 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(s32, count);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
     tcg_temp_free_i32(z32);
     tcg_temp_free_i32(s32);
 
@@ -1544,14 +1545,14 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     do_long:
 #ifdef TARGET_X86_64
     case MO_32:
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         if (is_right) {
-            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
         } else {
-            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
         }
-        tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
         break;
 #endif
     default:
@@ -1591,10 +1592,10 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, s->T1);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
+    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
-                        cpu_tmp2_i32, cpu_tmp3_i32);
+                        s->tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
 
@@ -1620,13 +1621,13 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
             if (is_right) {
-                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
             } else {
-                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
             }
-            tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
             break;
 #endif
         default:
@@ -2111,8 +2112,8 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
         tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
     }
     tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
-    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
-    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
+    tcg_gen_extrl_i64_i32(s->tmp2_i32, cpu_tmp1_i64);
+    gen_helper_bndck(cpu_env, s->tmp2_i32);
 }
 
 /* used for LEA and MOV AX, mem */
@@ -2289,8 +2290,8 @@ static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
 {
     if (s->pe && !s->vm86) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
            stop as a special handling must be done to disable hardware
@@ -2684,10 +2685,10 @@ static inline void gen_op_movq(int d_offset, int s_offset)
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
 }
 
-static inline void gen_op_movl(int d_offset, int s_offset)
+static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
-    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
+    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
+    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
 }
 
 static inline void gen_op_movq_env_0(int d_offset)
@@ -3150,8 +3151,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_mmx(s->ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
             }
             break;
         case 0x16e: /* movd xmm, ea */
@@ -3167,8 +3168,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_xmm(s->ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
             }
             break;
         case 0x6f: /* movq mm, ea */
@@ -3213,7 +3214,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
             }
             break;
@@ -3252,14 +3253,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
             break;
         case 0x312: /* movddup */
@@ -3294,14 +3295,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
             break;
         case 0x178:
@@ -3398,7 +3399,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_op_st_v(s, MO_32, s->T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
             }
             break;
@@ -3480,15 +3481,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, s->ptr0);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, s->ptr0);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
@@ -3522,8 +3523,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                sse_fn_epi(cpu_env, s->ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
@@ -3587,8 +3588,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (ot == MO_32) {
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
-                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, s->ptr0);
-                tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
+                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
@@ -3668,15 +3669,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7) | REX_B(s);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State, xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, s->ptr0);
+                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
             } else {
                 rm = (modrm & 7);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State, fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, s->ptr0);
+                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
 
         case 0x138:
@@ -3716,9 +3717,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
@@ -3780,9 +3781,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     ot = MO_64;
                 }
 
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                gen_helper_crc32(s->T0, cpu_tmp2_i32,
+                gen_helper_crc32(s->T0, s->tmp2_i32,
                                  s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
@@ -3910,11 +3911,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
-                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                      cpu_tmp2_i32, cpu_tmp3_i32);
-                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
+                    tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                                      s->tmp2_i32, cpu_tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
                     tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
@@ -4162,13 +4163,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 case 0x16:
                     if (ot == MO_32) { /* pextrd */
-                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_L(val & 3)));
                         if (mod == 3) {
-                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
+                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
                         } else {
-                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                     } else { /* pextrq */
@@ -4209,14 +4210,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 case 0x21: /* insertps */
                     if (mod == 3) {
-                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,xmm_regs[rm]
                                                 .ZMM_L((val >> 6) & 3)));
                     } else {
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                     }
-                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
                                     offsetof(CPUX86State,xmm_regs[reg]
                                             .ZMM_L((val >> 4) & 3)));
                     if ((val >> 0) & 1)
@@ -4239,12 +4240,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 case 0x22:
                     if (ot == MO_32) { /* pinsrd */
                         if (mod == 3) {
-                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
+                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
                         } else {
-                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
-                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_L(val & 3)));
                     } else { /* pinsrq */
@@ -4321,9 +4322,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
                 } else {
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
-                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
+                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                 }
                 gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
@@ -4880,11 +4881,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                  cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
@@ -4931,16 +4932,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                  cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
-                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
@@ -5061,13 +5062,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, s->T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, s->T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
@@ -5088,8 +5089,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, s->T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
                 gen_op_movl_seg_T0_vm(s, R_CS);
@@ -5208,15 +5209,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 #endif
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
             tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                              cpu_tmp2_i32, cpu_tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
-            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+            tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                              s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
+            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
             break;
         default:
             tcg_gen_ext16s_tl(s->T0, s->T0);
@@ -5820,14 +5821,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
                         tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
@@ -5836,9 +5837,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
-                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     }
 
@@ -5859,14 +5860,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0:
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
                         tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
@@ -5875,9 +5876,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
-                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     }
                     break;
@@ -5885,8 +5886,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     /* XXX: the corresponding CPUID bit must be tested ! */
                     switch(op >> 4) {
                     case 1:
-                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
@@ -5896,8 +5897,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5906,13 +5907,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 default:
                     switch(op >> 4) {
                     case 0:
-                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 1:
-                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
@@ -5922,8 +5923,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5936,16 +5937,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
-                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
-                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
+                gen_helper_fldcw(cpu_env, s->tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
-                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
+                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
@@ -5962,8 +5963,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
-                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
@@ -6241,8 +6242,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 0x3c: /* df/4 */
                 switch(rm) {
                 case 0:
-                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                     gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                     break;
                 default:
@@ -6394,10 +6395,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        tcg_gen_movi_i32(s->tmp2_i32, val);
+        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, s->T1);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6415,10 +6416,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_movi_i32(cpu_tmp2_i32, val);
+        tcg_gen_movi_i32(s->tmp2_i32, val);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6433,10 +6434,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, s->T1);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6453,10 +6454,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6734,12 +6735,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xfc: /* cld */
-        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
+        tcg_gen_movi_i32(s->tmp2_i32, 1);
+        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
     case 0xfd: /* std */
-        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
+        tcg_gen_movi_i32(s->tmp2_i32, -1);
+        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
 
         /************************/
@@ -7071,11 +7072,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             goto illegal_op;
         gen_op_mov_v_reg(ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         if (ot == MO_16) {
-            gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
+            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
         } else {
-            gen_helper_boundl(cpu_env, s->A0, cpu_tmp2_i32);
+            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
         }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
@@ -7264,8 +7265,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lldt(cpu_env, s->tmp2_i32);
             }
             break;
         case 1: /* str */
@@ -7285,8 +7286,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_ltr(cpu_env, s->tmp2_i32);
             }
             break;
         case 4: /* verr */
@@ -7385,8 +7386,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
                 goto illegal_op;
             }
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
             break;
 
@@ -7402,8 +7403,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xsetbv(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
             /* End TB because translation flags may change.  */
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
@@ -7562,8 +7563,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
             break;
         case 0xef: /* wrpkru */
@@ -7572,8 +7573,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_wrpkru(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
             break;
         CASE_MODRM_OP(6): /* lmsw */
             if (s->cpl != 0) {
@@ -8042,14 +8043,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_v_reg(ot, s->T0, rm);
-                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
+                tcg_gen_movi_i32(s->tmp2_i32, reg);
+                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
                 gen_jmp_im(s, s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_get_dr(s->T0, cpu_env, cpu_tmp2_i32);
+                tcg_gen_movi_i32(s->tmp2_i32, reg);
+                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
                 gen_op_mov_reg_v(ot, rm, s->T0);
             }
         }
@@ -8116,8 +8117,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0, s->mem_index, MO_LEUL);
-            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
+            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
             break;
 
         CASE_MODRM_MEM_OP(3): /* stmxcsr */
@@ -8216,8 +8217,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 TCGv base, treg, src, dst;
 
                 /* Preserve hflags bits by testing CR4 at runtime.  */
-                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
-                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
+                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
+                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
 
                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
@@ -8484,7 +8485,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 
     dc->tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
-    cpu_tmp2_i32 = tcg_temp_new_i32();
+    dc->tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
     dc->ptr0 = tcg_temp_new_ptr();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (8 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:56   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 64 ++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index ec68f7dba1..cd880cc2a8 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_i32 cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
 #include "exec/gen-icount.h"
@@ -143,6 +142,7 @@ typedef struct DisasContext {
     TCGv_ptr ptr0;
     TCGv_ptr ptr1;
     TCGv_i32 tmp2_i32;
+    TCGv_i32 tmp3_i32;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -1159,8 +1159,8 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
 
     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
-    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
-    gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
+    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
     gen_bpt_io(s, s->tmp2_i32, ot);
@@ -1426,8 +1426,8 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
     if (s->cc_op == CC_OP_DYNAMIC) {
         oldop = cpu_cc_op;
     } else {
-        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
-        oldop = cpu_tmp3_i32;
+        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
+        oldop = s->tmp3_i32;
     }
 
     /* Conditionally store the CC_OP value.  */
@@ -1546,11 +1546,11 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 #ifdef TARGET_X86_64
     case MO_32:
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
         if (is_right) {
-            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
         } else {
-            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
         }
         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
         break;
@@ -1593,9 +1593,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, s->T1);
     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
-    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
+    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
-                        s->tmp2_i32, cpu_tmp3_i32);
+                        s->tmp2_i32, s->tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
 
@@ -3912,11 +3912,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
-                    tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                                      s->tmp2_i32, cpu_tmp3_i32);
+                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
+                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+                                      s->tmp2_i32, s->tmp3_i32);
                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
-                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
@@ -4882,11 +4882,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             default:
             case MO_32:
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+                                  s->tmp2_i32, s->tmp3_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULL);
@@ -4933,14 +4933,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             default:
             case MO_32:
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
+                                  s->tmp2_i32, s->tmp3_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
@@ -5210,13 +5210,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #endif
         case MO_32:
             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-            tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                              s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
+                              s->tmp2_i32, s->tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
             break;
         default:
@@ -6417,8 +6417,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_movi_i32(s->tmp2_i32, val);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6455,8 +6455,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -8486,7 +8486,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
     dc->tmp2_i32 = tcg_temp_new_i32();
-    cpu_tmp3_i32 = tcg_temp_new_i32();
+    dc->tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
     dc->ptr0 = tcg_temp_new_ptr();
     dc->ptr1 = tcg_temp_new_ptr();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (9 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:57   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 160 ++++++++++++++++++++--------------------
 1 file changed, 80 insertions(+), 80 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index cd880cc2a8..61a98ef872 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,8 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_i64 cpu_tmp1_i64;
-
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
@@ -143,6 +141,7 @@ typedef struct DisasContext {
     TCGv_ptr ptr1;
     TCGv_i32 tmp2_i32;
     TCGv_i32 tmp3_i32;
+    TCGv_i64 tmp1_i64;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -2107,12 +2106,12 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
 {
     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
 
-    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
+    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
     if (!CODE64(s)) {
-        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
+        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
     }
-    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
-    tcg_gen_extrl_i64_i32(s->tmp2_i32, cpu_tmp1_i64);
+    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
+    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
     gen_helper_bndck(cpu_env, s->tmp2_i32);
 }
 
@@ -2641,48 +2640,48 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
 
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
 }
 
 static inline void gen_stq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
 }
 
 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
 static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
 }
 
-static inline void gen_op_movo(int d_offset, int s_offset)
+static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
-static inline void gen_op_movq(int d_offset, int s_offset)
+static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
@@ -2691,10 +2690,10 @@ static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
 }
 
-static inline void gen_op_movq_env_0(int d_offset)
+static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
 {
-    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_movi_i64(s->tmp1_i64, 0);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
@@ -3178,9 +3177,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
-                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
                                offsetof(CPUX86State,fpregs[rm].mmx));
-                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
             }
             break;
@@ -3195,7 +3194,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
                             offsetof(CPUX86State,xmm_regs[rm]));
             }
             break;
@@ -3230,7 +3229,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
@@ -3243,7 +3242,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             } else {
                 /* movhlps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
             }
             break;
@@ -3270,10 +3269,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             break;
         case 0x016: /* movhps */
@@ -3285,7 +3284,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             } else {
                 /* movlhps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
@@ -3361,10 +3360,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
+            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
@@ -3372,7 +3371,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
-                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
                             offsetof(CPUX86State,fpregs[reg].mmx));
             }
             break;
@@ -3387,7 +3386,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
                             offsetof(CPUX86State,xmm_regs[reg]));
             }
             break;
@@ -3410,7 +3409,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             }
             break;
@@ -3643,22 +3642,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
-                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
+                gen_op_movq_env_0(s,
+                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
             }
             break;
         case 0x2d6: /* movq2dq */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                         offsetof(CPUX86State,fpregs[rm].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
+            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
@@ -4174,13 +4174,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         }
                     } else { /* pextrq */
 #ifdef TARGET_X86_64
-                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_Q(val & 1)));
                         if (mod == 3) {
-                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
+                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
                         } else {
-                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
+                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
 #else
@@ -4251,12 +4251,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
                         if (mod == 3) {
-                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+                            gen_op_mov_v_reg(ot, s->tmp1_i64, rm);
                         } else {
-                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
+                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
-                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_Q(val & 1)));
 #else
@@ -5831,9 +5831,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
+                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
-                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
+                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
                         break;
                     case 3:
                     default:
@@ -5870,9 +5870,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
+                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
-                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
+                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
                         break;
                     case 3:
                     default:
@@ -5891,8 +5891,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
-                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
+                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
@@ -5917,8 +5917,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
-                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
+                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
@@ -5975,12 +5975,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
+                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
-                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
-                tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
+                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fpop(cpu_env);
                 break;
             default:
@@ -7387,8 +7387,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
-            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
             break;
 
         case 0xd1: /* xsetbv */
@@ -7401,10 +7401,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
             }
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xsetbv(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
+            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
             /* End TB because translation flags may change.  */
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
@@ -7564,17 +7564,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
-            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
             break;
         case 0xef: /* wrpkru */
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_wrpkru(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
+            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
             break;
         CASE_MODRM_OP(6): /* lmsw */
             if (s->cpl != 0) {
@@ -8141,9 +8141,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xsave(cpu_env, s->A0, cpu_tmp1_i64);
+            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
             break;
 
         CASE_MODRM_MEM_OP(5): /* xrstor */
@@ -8153,9 +8153,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xrstor(cpu_env, s->A0, cpu_tmp1_i64);
+            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
@@ -8181,9 +8181,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                       cpu_regs[R_EDX]);
-                gen_helper_xsaveopt(cpu_env, s->A0, cpu_tmp1_i64);
+                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
             }
             break;
 
@@ -8484,7 +8484,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->A0 = tcg_temp_new();
 
     dc->tmp0 = tcg_temp_new();
-    cpu_tmp1_i64 = tcg_temp_new_i64();
+    dc->tmp1_i64 = tcg_temp_new_i64();
     dc->tmp2_i32 = tcg_temp_new_i32();
     dc->tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (10 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:58   ` Richard Henderson
  2018-09-13 14:31   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
  2018-09-12 12:46 ` [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Paolo Bonzini
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

And convert it to a bool to use an existing hole
in the struct.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 307 ++++++++++++++++++++--------------------
 1 file changed, 154 insertions(+), 153 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 61a98ef872..b8222dc4ba 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -81,10 +81,6 @@ static TCGv_i64 cpu_bndu[4];
 
 #include "exec/gen-icount.h"
 
-#ifdef TARGET_X86_64
-static int x86_64_hregs;
-#endif
-
 typedef struct DisasContext {
     DisasContextBase base;
 
@@ -109,6 +105,9 @@ typedef struct DisasContext {
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+#ifdef TARGET_X86_64
+    bool x86_64_hregs;
+#endif
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -307,13 +306,13 @@ static void gen_update_cc_op(DisasContext *s)
  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
  * true for this special case, false otherwise.
  */
-static inline bool byte_reg_is_xH(int reg)
+static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 {
     if (reg < 4) {
         return false;
     }
 #ifdef TARGET_X86_64
-    if (reg >= 8 || x86_64_hregs) {
+    if (reg >= 8 || s->x86_64_hregs) {
         return false;
     }
 #endif
@@ -360,11 +359,11 @@ static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 }
 
-static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
+static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
 {
     switch(ot) {
     case MO_8:
-        if (!byte_reg_is_xH(reg)) {
+        if (!byte_reg_is_xH(s, reg)) {
             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
@@ -388,9 +387,10 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
     }
 }
 
-static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
+static inline
+void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
 {
-    if (ot == MO_8 && byte_reg_is_xH(reg)) {
+    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
     } else {
         tcg_gen_mov_tl(t0, cpu_regs[reg]);
@@ -414,13 +414,13 @@ static inline
 void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 {
     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
-    gen_op_mov_reg_v(size, reg, s->tmp0);
+    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 }
 
 static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
-    gen_op_mov_reg_v(size, reg, s->tmp0);
+    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 }
 
 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
@@ -438,7 +438,7 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
     if (d == OR_TMP0) {
         gen_op_st_v(s, idx, s->T0, s->A0);
     } else {
-        gen_op_mov_reg_v(idx, d, s->T0);
+        gen_op_mov_reg_v(s, idx, d, s->T0);
     }
 }
 
@@ -1077,7 +1077,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 
 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
     gen_string_movl_A0_EDI(s);
     gen_op_st_v(s, ot, s->T0, s->A0);
     gen_op_movl_T0_Dshift(s, ot);
@@ -1088,7 +1088,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
     gen_op_ld_v(s, ot, s->T0, s->A0);
-    gen_op_mov_reg_v(ot, R_EAX, s->T0);
+    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
 }
@@ -1272,7 +1272,7 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, s1->T0, d);
+        gen_op_mov_v_reg(s1, ot, s1->T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
     }
@@ -1383,7 +1383,7 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
-            gen_op_mov_v_reg(ot, s1->T0, d);
+            gen_op_mov_v_reg(s1, ot, s1->T0, d);
         } else {
             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
         }
@@ -1450,7 +1450,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
@@ -1486,7 +1486,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     if (op1 == OR_TMP0)
         gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
 
     op2 &= mask;
     if (op2 != 0) {
@@ -1526,7 +1526,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
@@ -1612,7 +1612,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     op2 &= mask;
@@ -1690,7 +1690,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (op1 == OR_TMP0)
         gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     
     if (is_right) {
         switch (ot) {
@@ -1746,7 +1746,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     count = tcg_temp_new();
@@ -1820,7 +1820,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_v_reg(ot, s1->T1, s);
+        gen_op_mov_v_reg(s1, ot, s1->T1, s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -2133,23 +2133,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, s->T0, reg);
-            gen_op_mov_reg_v(ot, rm, s->T0);
+                gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
     } else {
         gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, s->T0, reg);
+                gen_op_mov_v_reg(s, ot, s->T0, reg);
             gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
             gen_op_ld_v(s, ot, s->T0, s->A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
     }
 }
@@ -2260,7 +2260,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
 
     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
                        s->T0, cpu_regs[reg]);
-    gen_op_mov_reg_v(ot, reg, s->T0);
+    gen_op_mov_reg_v(s, ot, reg, s->T0);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2354,7 +2354,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
     }
 
     gen_op_st_v(s, d_ot, val, s->A0);
-    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
 }
 
 /* two step pop is necessary for precise exceptions */
@@ -2409,7 +2409,7 @@ static void gen_popa(DisasContext *s)
         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
         gen_op_ld_v(s, d_ot, s->T0, s->A0);
-        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
+        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
     }
 
     gen_stack_update(s, 8 * size);
@@ -2448,11 +2448,11 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     }
 
     /* Copy the FrameTemp value to EBP.  */
-    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
+    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
 
     /* Compute the final value of ESP.  */
     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
-    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
 }
 
 static void gen_leave(DisasContext *s)
@@ -2465,8 +2465,8 @@ static void gen_leave(DisasContext *s)
 
     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
 
-    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
-    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
+    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
@@ -3598,7 +3598,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3633,7 +3633,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
@@ -3787,7 +3787,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                  s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3814,7 +3814,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                        s->mem_index, ot | MO_BE);
-                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                 } else {
                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
@@ -3830,7 +3830,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
@@ -3868,7 +3868,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_subi_tl(s->T1, s->T1, 1);
                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
 
-                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                     gen_op_update1_cc(s);
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
@@ -3896,7 +3896,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_movi_tl(s->A0, -1);
                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
@@ -4071,7 +4071,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     }
                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
                 }
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             case 0x0f3:
@@ -4104,7 +4104,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     goto unknown_op;
                 }
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
+                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -4145,7 +4145,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, s->T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
                         tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
@@ -4155,7 +4155,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_W(val & 7)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, s->T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
                         tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUW);
@@ -4192,7 +4192,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_L(val & 3)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, s->T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
                         tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUL);
@@ -4200,7 +4200,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 case 0x20: /* pinsrb */
                     if (mod == 3) {
-                        gen_op_mov_v_reg(MO_32, s->T0, rm);
+                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
                     } else {
                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
@@ -4251,7 +4251,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
                         if (mod == 3) {
-                            gen_op_mov_v_reg(ot, s->tmp1_i64, rm);
+                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
                         } else {
                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
@@ -4326,7 +4326,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                 }
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             default:
@@ -4489,7 +4489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #ifdef TARGET_X86_64
     s->rex_x = 0;
     s->rex_b = 0;
-    x86_64_hregs = 0;
+    s->x86_64_hregs = false;
 #endif
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
@@ -4548,7 +4548,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rex_r = (b & 0x4) << 1;
             s->rex_x = (b & 0x2) << 2;
             REX_B(s) = (b & 0x1) << 3;
-            x86_64_hregs = 1; /* select uniform byte register addressing */
+            /* select uniform byte register addressing */
+            s->x86_64_hregs = true;
             goto next_byte;
         }
         break;
@@ -4576,7 +4577,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
 #ifdef TARGET_X86_64
-            if (x86_64_hregs) {
+            if (s->x86_64_hregs) {
                 goto illegal_op;
             }
 #endif
@@ -4681,12 +4682,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
                     tcg_gen_movi_tl(s->T0, 0);
-                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                     break;
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_v_reg(ot, s->T1, reg);
+                gen_op_mov_v_reg(s, ot, s->T1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4700,7 +4701,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_v_reg(ot, s->T1, rm);
+                    gen_op_mov_v_reg(s, ot, s->T1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
@@ -4786,7 +4787,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
 
         switch(op) {
@@ -4809,7 +4810,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod != 3) {
                     gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
             break;
@@ -4847,7 +4848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod != 3) {
                     gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
             gen_op_update_neg_cc(s);
@@ -4856,26 +4857,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 4: /* mul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
                 tcg_gen_ext8u_tl(s->T0, s->T0);
                 tcg_gen_ext8u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
                 tcg_gen_ext16u_tl(s->T0, s->T0);
                 tcg_gen_ext16u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
@@ -4905,29 +4906,29 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 5: /* imul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
                 tcg_gen_ext8s_tl(s->T0, s->T0);
                 tcg_gen_ext8s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
                 tcg_gen_ext16s_tl(s->T0, s->T0);
                 tcg_gen_ext16s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
@@ -5026,7 +5027,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
 
         switch(op) {
@@ -5115,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(ot, s->T1, reg);
+        gen_op_mov_v_reg(s, ot, s->T1, reg);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5125,7 +5126,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
+        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
         tcg_gen_movi_tl(s->T1, val);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5135,20 +5136,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
             tcg_gen_ext32s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
+            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
             tcg_gen_ext16s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
+            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
             tcg_gen_ext8s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5158,22 +5159,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
             tcg_gen_sari_tl(s->T0, s->T0, 63);
-            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
+            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
             tcg_gen_ext32s_tl(s->T0, s->T0);
             tcg_gen_sari_tl(s->T0, s->T0, 31);
-            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
+            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
             tcg_gen_ext16s_tl(s->T0, s->T0);
             tcg_gen_sari_tl(s->T0, s->T0, 15);
-            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5197,7 +5198,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             val = (int8_t)insn_get(env, s, MO_8);
             tcg_gen_movi_tl(s->T1, val);
         } else {
-            gen_op_mov_v_reg(ot, s->T1, reg);
+            gen_op_mov_v_reg(s, ot, s->T1, reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
@@ -5227,7 +5228,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             tcg_gen_ext16s_tl(s->tmp0, s->T0);
             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
@@ -5238,13 +5239,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
-        gen_op_mov_v_reg(ot, s->T0, reg);
+        gen_op_mov_v_reg(s, ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, s->T1, rm);
+            gen_op_mov_v_reg(s, ot, s->T1, rm);
             tcg_gen_add_tl(s->T0, s->T0, s->T1);
-            gen_op_mov_reg_v(ot, reg, s->T1);
-            gen_op_mov_reg_v(ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
@@ -5256,7 +5257,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
                 gen_op_st_v(s, ot, s->T0, s->A0);
             }
-            gen_op_mov_reg_v(ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         }
         gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
@@ -5273,7 +5274,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             oldv = tcg_temp_new();
             newv = tcg_temp_new();
             cmpv = tcg_temp_new();
-            gen_op_mov_v_reg(ot, newv, reg);
+            gen_op_mov_v_reg(s, ot, newv, reg);
             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
 
             if (s->prefix & PREFIX_LOCK) {
@@ -5283,11 +5284,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                                           s->mem_index, ot | MO_LE);
-                gen_op_mov_reg_v(ot, R_EAX, oldv);
+                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
             } else {
                 if (mod == 3) {
                     rm = (modrm & 7) | REX_B(s);
-                    gen_op_mov_v_reg(ot, oldv, rm);
+                    gen_op_mov_v_reg(s, ot, oldv, rm);
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     gen_op_ld_v(s, ot, oldv, s->A0);
@@ -5298,15 +5299,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* store value = (old == cmp ? new : old);  */
                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
                 if (mod == 3) {
-                    gen_op_mov_reg_v(ot, R_EAX, oldv);
-                    gen_op_mov_reg_v(ot, rm, newv);
+                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(s, ot, rm, newv);
                 } else {
                     /* Perform an unconditional store cycle like physical cpu;
                        must be before changing accumulator to ensure
                        idempotency if the store faults and the instruction
                        is restarted */
                     gen_op_st_v(s, ot, newv, s->A0);
-                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
                 }
             }
             tcg_gen_mov_tl(cpu_cc_src, oldv);
@@ -5351,14 +5352,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
+        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
         gen_push_v(s, s->T0);
         break;
     case 0x58 ... 0x5f: /* pop */
         ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s, ot);
-        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
+        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5388,7 +5389,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -5478,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod != 3) {
             gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
+            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
         }
         break;
     case 0x8a:
@@ -5488,7 +5489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(ot, reg, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = x86_ldub_code(env, s);
@@ -5540,10 +5541,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
+                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
                 } else {
-                    gen_op_mov_v_reg(ot, s->T0, rm);
+                    gen_op_mov_v_reg(s, ot, s->T0, rm);
                     switch (s_ot) {
                     case MO_UB:
                         tcg_gen_ext8u_tl(s->T0, s->T0);
@@ -5560,11 +5561,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     }
                 }
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             }
         }
         break;
@@ -5579,7 +5580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             TCGv ea = gen_lea_modrm_1(s, a);
             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
-            gen_op_mov_reg_v(dflag, reg, s->A0);
+            gen_op_mov_reg_v(s, dflag, reg, s->A0);
         }
         break;
 
@@ -5605,9 +5606,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
                 gen_op_ld_v(s, ot, s->T0, s->A0);
-                gen_op_mov_reg_v(ot, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
             } else {
-                gen_op_mov_v_reg(ot, s->T0, R_EAX);
+                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
                 gen_op_st_v(s, ot, s->T0, s->A0);
             }
         }
@@ -5619,12 +5620,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
         gen_op_ld_v(s, MO_8, s->T0, s->A0);
-        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(env, s, MO_8);
         tcg_gen_movi_tl(s->T0, val);
-        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
+        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -5634,7 +5635,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tmp = x86_ldq_code(env, s);
             reg = (b & 7) | REX_B(s);
             tcg_gen_movi_tl(s->T0, tmp);
-            gen_op_mov_reg_v(MO_64, reg, s->T0);
+            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
         } else
 #endif
         {
@@ -5642,7 +5643,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
             tcg_gen_movi_tl(s->T0, val);
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
         break;
 
@@ -5661,17 +5662,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_v_reg(ot, s->T0, reg);
-            gen_op_mov_v_reg(ot, s->T1, rm);
-            gen_op_mov_reg_v(ot, rm, s->T0);
-            gen_op_mov_reg_v(ot, reg, s->T1);
+            gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_mov_v_reg(s, ot, s->T1, rm);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, s->T0, reg);
+            gen_op_mov_v_reg(s, ot, s->T0, reg);
             /* for xchg, lock is implicit */
             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
-            gen_op_mov_reg_v(ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5704,7 +5705,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
-        gen_op_mov_reg_v(ot, reg, s->T1);
+        gen_op_mov_reg_v(s, ot, reg, s->T1);
         if (s->base.is_jmp) {
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
@@ -5783,7 +5784,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             opreg = rm;
         }
-        gen_op_mov_v_reg(ot, s->T1, reg);
+        gen_op_mov_v_reg(s, ot, s->T1, reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
@@ -6244,7 +6245,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0:
                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
-                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                     break;
                 default:
                     goto unknown_op;
@@ -6397,7 +6398,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 	}
         tcg_gen_movi_i32(s->tmp2_i32, val);
         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, s->T1);
+        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6411,7 +6412,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, s->T1, R_EAX);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
@@ -6436,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 	}
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, s->T1);
+        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6449,7 +6450,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, s->T1, R_EAX);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
@@ -6708,7 +6709,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
+        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
@@ -6720,7 +6721,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
-        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -6758,7 +6759,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
         /* load shift */
         val = x86_ldub_code(env, s);
@@ -6784,7 +6785,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(MO_32, s->T1, reg);
+        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
         if (mod != 3) {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
@@ -6797,7 +6798,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
     bt_op:
         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
@@ -6847,7 +6848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod != 3) {
                     gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
         }
@@ -6930,7 +6931,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
             }
         }
-        gen_op_mov_reg_v(ot, reg, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
         /************************/
         /* bcd */
@@ -7070,7 +7071,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_v_reg(ot, s->T0, reg);
+        gen_op_mov_v_reg(s, ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         if (ot == MO_16) {
@@ -7083,16 +7084,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(MO_64, s->T0, reg);
+            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
             tcg_gen_bswap64_i64(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_64, reg, s->T0);
+            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
         } else
 #endif
         {
-            gen_op_mov_v_reg(MO_32, s->T0, reg);
+            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
             tcg_gen_ext32u_tl(s->T0, s->T0);
             tcg_gen_bswap32_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_32, reg, s->T0);
+            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
         }
         break;
     case 0xd6: /* salc */
@@ -7100,7 +7101,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             goto illegal_op;
         gen_compute_eflags_c(s, s->T0);
         tcg_gen_neg_tl(s->T0, s->T0);
-        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7661,16 +7662,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(MO_32, s->T0, rm);
+                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
                 /* sign extend */
                 if (d_ot == MO_64) {
                     tcg_gen_ext32s_tl(s->T0, s->T0);
                 }
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             }
         } else
 #endif
@@ -7694,10 +7695,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, s->A0);
             } else {
-                gen_op_mov_v_reg(ot, t0, rm);
+                gen_op_mov_v_reg(s, ot, t0, rm);
                 a0 = NULL;
             }
-            gen_op_mov_v_reg(ot, t1, reg);
+            gen_op_mov_v_reg(s, ot, t1, reg);
             tcg_gen_andi_tl(s->tmp0, t0, 3);
             tcg_gen_andi_tl(t1, t1, 3);
             tcg_gen_movi_tl(t2, 0);
@@ -7711,7 +7712,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_st_v(s, ot, t0, a0);
                 tcg_temp_free(a0);
            } else {
-                gen_op_mov_reg_v(ot, rm, t0);
+                gen_op_mov_reg_v(s, ot, rm, t0);
             }
             gen_compute_eflags(s);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
@@ -7742,7 +7743,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
-            gen_op_mov_reg_v(ot, reg, t0);
+            gen_op_mov_reg_v(s, ot, reg, t0);
             gen_set_label(label1);
             set_cc_op(s, CC_OP_EFLAGS);
             tcg_temp_free(t0);
@@ -7996,7 +7997,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_op_mov_v_reg(ot, s->T0, rm);
+                    gen_op_mov_v_reg(s, ot, s->T0, rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
                                          s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -8009,7 +8010,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         gen_io_start();
                     }
                     gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -8042,7 +8043,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(ot, s->T0, rm);
+                gen_op_mov_v_reg(s, ot, s->T0, rm);
                 tcg_gen_movi_i32(s->tmp2_i32, reg);
                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
                 gen_jmp_im(s, s->pc - s->cs_base);
@@ -8051,7 +8052,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
                 tcg_gen_movi_i32(s->tmp2_i32, reg);
                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
-                gen_op_mov_reg_v(ot, rm, s->T0);
+                gen_op_mov_reg_v(s, ot, rm, s->T0);
             }
         }
         break;
@@ -8313,7 +8314,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_extu(ot, s->T0);
         tcg_gen_mov_tl(cpu_cc_src, s->T0);
         tcg_gen_ctpop_tl(s->T0, s->T0);
-        gen_op_mov_reg_v(ot, reg, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
 
         set_cc_op(s, CC_OP_POPCNT);
         break;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (11 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-12 12:46 ` [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Paolo Bonzini
  13 siblings, 0 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index 58862d2ae8..f715252c9f 100755
--- a/configure
+++ b/configure
@@ -7025,12 +7025,14 @@ TARGET_ABI_DIR=""
 
 case "$target_name" in
   i386)
+    mttcg="yes"
     gdb_xml_files="i386-32bit.xml i386-32bit-core.xml i386-32bit-sse.xml"
     target_compiler=$cross_cc_i386
     target_compiler_cflags=$cross_cc_ccflags_i386
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
+    mttcg="yes"
     gdb_xml_files="i386-64bit.xml i386-64bit-core.xml i386-64bit-sse.xml"
     target_compiler=$cross_cc_x86_64
   ;;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
@ 2018-09-11 20:44   ` Richard Henderson
  2018-09-13 14:21   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:44 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
@ 2018-09-11 20:45   ` Richard Henderson
  2018-09-13 14:23   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:45 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 472 ++++++++++++++++++++--------------------
>  1 file changed, 236 insertions(+), 236 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
@ 2018-09-11 20:47   ` Richard Henderson
  2018-09-13 14:25   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:47 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 1174 ++++++++++++++++++++-------------------
>  1 file changed, 594 insertions(+), 580 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
@ 2018-09-11 20:48   ` Richard Henderson
  2018-09-13 14:26   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:48 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 341 ++++++++++++++++++++--------------------
>  1 file changed, 170 insertions(+), 171 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
@ 2018-09-11 20:51   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:51 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 282 ++++++++++++++++++++--------------------
>  1 file changed, 144 insertions(+), 138 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

I will note that these tmpN variables ought to be eliminated
completely, in favor of totally local temporary allocation.

But that should be done separately, because while they *ought*
to be local, in some cases it may be hard to see that they are.


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
@ 2018-09-11 20:52   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:52 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 78 ++++++++++++++++++++---------------------
>  1 file changed, 39 insertions(+), 39 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
@ 2018-09-11 20:53   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:53 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 101 +++++++++++++++++++++-------------------
>  1 file changed, 52 insertions(+), 49 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
@ 2018-09-11 20:54   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:54 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 52 ++++++++++++++++++++---------------------
>  1 file changed, 26 insertions(+), 26 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
@ 2018-09-11 20:55   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:55 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 347 ++++++++++++++++++++--------------------
>  1 file changed, 174 insertions(+), 173 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
@ 2018-09-11 20:56   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:56 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 64 ++++++++++++++++++++---------------------
>  1 file changed, 32 insertions(+), 32 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
@ 2018-09-11 20:57   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:57 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 160 ++++++++++++++++++++--------------------
>  1 file changed, 80 insertions(+), 80 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
@ 2018-09-11 20:58   ` Richard Henderson
  2018-09-13 14:31   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:58 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> And convert it to a bool to use an existing hole
> in the struct.
> 
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 307 ++++++++++++++++++++--------------------
>  1 file changed, 154 insertions(+), 153 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (12 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
@ 2018-09-12 12:46 ` Paolo Bonzini
  13 siblings, 0 replies; 32+ messages in thread
From: Paolo Bonzini @ 2018-09-12 12:46 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Peter Crosthwaite, Richard Henderson, Eduardo Habkost,
	Alex Bennée

On 11/09/2018 22:28, Emilio G. Cota wrote:
> v2: https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg01122.html
> 
> Changes since v2:
> 
> - Add rth's R-b tag to the last patch
> - Drop v2's first 10 patches, since Paolo already picked those up
> - Move TCG temps + x86_64_hregs to DisasContext
>   + While at it, drop the cpu_ prefix from the TCG temps,
>     e.g. cpu_A0 -> s->A0
>   + Split the conversion into separate patches to ease review.
>     The patches are quite boring and long because the temps
>     are everywhere, and I had to add DisasContext *s to quite a few
>     functions
> 
> The series is checkpatch-clean.
> 
> You can fetch these patches from:
>   https://github.com/cota/qemu/tree/i386-mttcg-v3

Great, thanks!

Paolo

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
  2018-09-11 20:44   ` Richard Henderson
@ 2018-09-13 14:21   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:21 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 1f9d1d9b24..e9f512472e 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -73,7 +73,7 @@
>
>  /* global register indexes */
>  static TCGv cpu_A0;
> -static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
> +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
>  static TCGv cpu_seg_base[6];
> @@ -135,6 +135,10 @@ typedef struct DisasContext {
>      int cpuid_ext3_features;
>      int cpuid_7_0_ebx_features;
>      int cpuid_xsave_features;
> +
> +    /* TCG local temps */
> +    TCGv cc_srcT;
> +
>      sigjmp_buf jmpbuf;
>  } DisasContext;
>
> @@ -244,7 +248,7 @@ static void set_cc_op(DisasContext *s, CCOp op)
>          tcg_gen_discard_tl(cpu_cc_src2);
>      }
>      if (dead & USES_CC_SRCT) {
> -        tcg_gen_discard_tl(cpu_cc_srcT);
> +        tcg_gen_discard_tl(s->cc_srcT);
>      }
>
>      if (op == CC_OP_DYNAMIC) {
> @@ -667,11 +671,11 @@ static inline void gen_op_testl_T0_T1_cc(void)
>      tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
>  }
>
> -static void gen_op_update_neg_cc(void)
> +static void gen_op_update_neg_cc(DisasContext *s)
>  {
>      tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>      tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
> -    tcg_gen_movi_tl(cpu_cc_srcT, 0);
> +    tcg_gen_movi_tl(s->cc_srcT, 0);
>  }
>
>  /* compute all eflags to cc_src */
> @@ -742,7 +746,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
>          t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
>          /* If no temporary was used, be careful not to alias t1 and t0.  */
>          t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
> -        tcg_gen_mov_tl(t0, cpu_cc_srcT);
> +        tcg_gen_mov_tl(t0, s->cc_srcT);
>          gen_extu(size, t0);
>          goto add_sub;
>
> @@ -899,7 +903,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
>          size = s->cc_op - CC_OP_SUBB;
>          switch (jcc_op) {
>          case JCC_BE:
> -            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
> +            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
>              gen_extu(size, cpu_tmp4);
>              t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
>              cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
> @@ -912,7 +916,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
>          case JCC_LE:
>              cond = TCG_COND_LE;
>          fast_jcc_l:
> -            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
> +            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
>              gen_exts(size, cpu_tmp4);
>              t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
>              cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
> @@ -1309,11 +1313,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_neg_tl(cpu_T0, cpu_T1);
> -            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
> -            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
> +            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
>          } else {
> -            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
> +            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
>              tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> @@ -1356,7 +1360,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_CMPL:
>          tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
> +        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
>          tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
> @@ -4823,7 +4827,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
>              }
> -            gen_op_update_neg_cc();
> +            gen_op_update_neg_cc(s);
>              set_cc_op(s, CC_OP_SUBB + ot);
>              break;
>          case 4: /* mul */
> @@ -5283,7 +5287,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>              }
>              tcg_gen_mov_tl(cpu_cc_src, oldv);
> -            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
> +            tcg_gen_mov_tl(s->cc_srcT, cmpv);
>              tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
>              set_cc_op(s, CC_OP_SUBB + ot);
>              tcg_temp_free(oldv);
> @@ -8463,7 +8467,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>      cpu_tmp4 = tcg_temp_new();
>      cpu_ptr0 = tcg_temp_new_ptr();
>      cpu_ptr1 = tcg_temp_new_ptr();
> -    cpu_cc_srcT = tcg_temp_local_new();
> +    dc->cc_srcT = tcg_temp_local_new();
>  }
>
>  static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
  2018-09-11 20:45   ` Richard Henderson
@ 2018-09-13 14:23   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:23 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 472 ++++++++++++++++++++--------------------
>  1 file changed, 236 insertions(+), 236 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index e9f512472e..c6b1baab9d 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -72,7 +72,6 @@
>  //#define MACRO_TEST   1
>
>  /* global register indexes */
> -static TCGv cpu_A0;
>  static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
> @@ -138,6 +137,7 @@ typedef struct DisasContext {
>
>      /* TCG local temps */
>      TCGv cc_srcT;
> +    TCGv A0;
>
>      sigjmp_buf jmpbuf;
>  } DisasContext;
> @@ -395,9 +395,9 @@ static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
>
>  static void gen_add_A0_im(DisasContext *s, int val)
>  {
> -    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +    tcg_gen_addi_tl(s->A0, s->A0, val);
>      if (!CODE64(s)) {
> -        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> +        tcg_gen_ext32u_tl(s->A0, s->A0);
>      }
>  }
>
> @@ -431,7 +431,7 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
>  static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
>  {
>      if (d == OR_TMP0) {
> -        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
> +        gen_op_st_v(s, idx, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_reg_v(idx, d, cpu_T0);
>      }
> @@ -453,7 +453,7 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
>  #ifdef TARGET_X86_64
>      case MO_64:
>          if (ovr_seg < 0) {
> -            tcg_gen_mov_tl(cpu_A0, a0);
> +            tcg_gen_mov_tl(s->A0, a0);
>              return;
>          }
>          break;
> @@ -464,14 +464,14 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
>              ovr_seg = def_seg;
>          }
>          if (ovr_seg < 0) {
> -            tcg_gen_ext32u_tl(cpu_A0, a0);
> +            tcg_gen_ext32u_tl(s->A0, a0);
>              return;
>          }
>          break;
>      case MO_16:
>          /* 16 bit address */
> -        tcg_gen_ext16u_tl(cpu_A0, a0);
> -        a0 = cpu_A0;
> +        tcg_gen_ext16u_tl(s->A0, a0);
> +        a0 = s->A0;
>          if (ovr_seg < 0) {
>              if (s->addseg) {
>                  ovr_seg = def_seg;
> @@ -488,13 +488,13 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
>          TCGv seg = cpu_seg_base[ovr_seg];
>
>          if (aflag == MO_64) {
> -            tcg_gen_add_tl(cpu_A0, a0, seg);
> +            tcg_gen_add_tl(s->A0, a0, seg);
>          } else if (CODE64(s)) {
> -            tcg_gen_ext32u_tl(cpu_A0, a0);
> -            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
> +            tcg_gen_ext32u_tl(s->A0, a0);
> +            tcg_gen_add_tl(s->A0, s->A0, seg);
>          } else {
> -            tcg_gen_add_tl(cpu_A0, a0, seg);
> -            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> +            tcg_gen_add_tl(s->A0, a0, seg);
> +            tcg_gen_ext32u_tl(s->A0, s->A0);
>          }
>      }
>  }
> @@ -640,9 +640,9 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>  static inline void gen_movs(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_ESI);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
> @@ -1072,7 +1072,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  {
>      gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
>  }
> @@ -1080,7 +1080,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_ESI);
> @@ -1089,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_op(s, OP_CMPL, ot, R_EAX);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
> @@ -1098,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_string_movl_A0_ESI(s);
>      gen_op(s, OP_CMPL, ot, OR_TMP0);
>      gen_op_movl_T0_Dshift(ot);
> @@ -1128,11 +1128,11 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
>      /* Note: we must do this dummy write first to be restartable in
>         case of page fault. */
>      tcg_gen_movi_tl(cpu_T0, 0);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
>      gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
>      gen_bpt_io(s, cpu_tmp2_i32, ot);
> @@ -1147,7 +1147,7 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
>          gen_io_start();
>      }
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T0, s->A0);
>
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
> @@ -1267,14 +1267,14 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      if (d != OR_TMP0) {
>          gen_op_mov_v_reg(ot, cpu_T0, d);
>      } else if (!(s1->prefix & PREFIX_LOCK)) {
> -        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
>      }
>      switch(op) {
>      case OP_ADCL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1289,7 +1289,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
>              tcg_gen_neg_tl(cpu_T0, cpu_T0);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1301,7 +1301,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ADDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1313,7 +1313,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_neg_tl(cpu_T0, cpu_T1);
> -            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
>              tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
>          } else {
> @@ -1327,7 +1327,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      default:
>      case OP_ANDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1338,7 +1338,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                         s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1349,7 +1349,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_XORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1372,13 +1372,13 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
>  {
>      if (s1->prefix & PREFIX_LOCK) {
>          tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
> -        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
>                                      s1->mem_index, ot | MO_LE);
>      } else {
>          if (d != OR_TMP0) {
>              gen_op_mov_v_reg(ot, cpu_T0, d);
>          } else {
> -            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
>          }
>          tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
>          gen_op_st_rm_T0_A0(s1, ot, d);
> @@ -1441,7 +1441,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -1477,7 +1477,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      else
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>
> @@ -1517,7 +1517,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -1603,7 +1603,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -1681,7 +1681,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      else
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>
> @@ -1737,7 +1737,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -2052,7 +2052,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
>  }
>
>  /* Compute the address, with a minimum number of TCG ops.  */
> -static TCGv gen_lea_modrm_1(AddressParts a)
> +static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
>  {
>      TCGv ea = NULL;
>
> @@ -2060,22 +2060,22 @@ static TCGv gen_lea_modrm_1(AddressParts a)
>          if (a.scale == 0) {
>              ea = cpu_regs[a.index];
>          } else {
> -            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
> -            ea = cpu_A0;
> +            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
> +            ea = s->A0;
>          }
>          if (a.base >= 0) {
> -            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
> -            ea = cpu_A0;
> +            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
> +            ea = s->A0;
>          }
>      } else if (a.base >= 0) {
>          ea = cpu_regs[a.base];
>      }
>      if (!ea) {
> -        tcg_gen_movi_tl(cpu_A0, a.disp);
> -        ea = cpu_A0;
> +        tcg_gen_movi_tl(s->A0, a.disp);
> +        ea = s->A0;
>      } else if (a.disp != 0) {
> -        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
> -        ea = cpu_A0;
> +        tcg_gen_addi_tl(s->A0, ea, a.disp);
> +        ea = s->A0;
>      }
>
>      return ea;
> @@ -2084,7 +2084,7 @@ static TCGv gen_lea_modrm_1(AddressParts a)
>  static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
>  {
>      AddressParts a = gen_lea_modrm_0(env, s, modrm);
> -    TCGv ea = gen_lea_modrm_1(a);
> +    TCGv ea = gen_lea_modrm_1(s, a);
>      gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
>  }
>
> @@ -2097,7 +2097,7 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
>  static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
>                        TCGCond cond, TCGv_i64 bndv)
>  {
> -    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
> +    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
>
>      tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
>      if (!CODE64(s)) {
> @@ -2111,7 +2111,7 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
>  /* used for LEA and MOV AX, mem */
>  static void gen_add_A0_ds_seg(DisasContext *s)
>  {
> -    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
> +    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
>  }
>
>  /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
> @@ -2138,9 +2138,9 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
>          if (is_store) {
>              if (reg != OR_TMP0)
>                  gen_op_mov_v_reg(ot, cpu_T0, reg);
> -            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, ot, cpu_T0, s->A0);
>          } else {
> -            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              if (reg != OR_TMP0)
>                  gen_op_mov_reg_v(ot, reg, cpu_T0);
>          }
> @@ -2334,19 +2334,19 @@ static void gen_push_v(DisasContext *s, TCGv val)
>      TCGMemOp d_ot = mo_pushpop(s, s->dflag);
>      TCGMemOp a_ot = mo_stacksize(s);
>      int size = 1 << d_ot;
> -    TCGv new_esp = cpu_A0;
> +    TCGv new_esp = s->A0;
>
> -    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
> +    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
>
>      if (!CODE64(s)) {
>          if (s->addseg) {
>              new_esp = cpu_tmp4;
> -            tcg_gen_mov_tl(new_esp, cpu_A0);
> +            tcg_gen_mov_tl(new_esp, s->A0);
>          }
> -        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> +        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
>      }
>
> -    gen_op_st_v(s, d_ot, val, cpu_A0);
> +    gen_op_st_v(s, d_ot, val, s->A0);
>      gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
>  }
>
> @@ -2356,7 +2356,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
>      TCGMemOp d_ot = mo_pushpop(s, s->dflag);
>
>      gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
>
>      return d_ot;
>  }
> @@ -2379,9 +2379,9 @@ static void gen_pusha(DisasContext *s)
>      int i;
>
>      for (i = 0; i < 8; i++) {
> -        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
> -        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
> -        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
> +        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
> +        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
> +        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
>      }
>
>      gen_stack_update(s, -8 * size);
> @@ -2399,9 +2399,9 @@ static void gen_popa(DisasContext *s)
>          if (7 - i == R_ESP) {
>              continue;
>          }
> -        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
> -        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
> -        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
> +        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
> +        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
> +        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
>          gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
>      }
>
> @@ -2417,7 +2417,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>      /* Push BP; compute FrameTemp into T1.  */
>      tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
>      gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
> -    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
> +    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
>
>      level &= 31;
>      if (level != 0) {
> @@ -2425,19 +2425,19 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>
>          /* Copy level-1 pointers from the previous frame.  */
>          for (i = 1; i < level; ++i) {
> -            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
> -            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> -            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
> +            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
> +            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> +            gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
>
> -            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
> -            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> -            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
> +            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
> +            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> +            gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
>          }
>
>          /* Push the current FrameTemp as the last level.  */
> -        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
> -        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> -        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
> +        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
> +        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> +        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
>      }
>
>      /* Copy the FrameTemp value to EBP.  */
> @@ -2454,7 +2454,7 @@ static void gen_leave(DisasContext *s)
>      TCGMemOp a_ot = mo_stacksize(s);
>
>      gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
>
>      tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
>
> @@ -2633,22 +2633,22 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
>
>  static inline void gen_ldq_env_A0(DisasContext *s, int offset)
>  {
> -    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>      tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
>  }
>
>  static inline void gen_stq_env_A0(DisasContext *s, int offset)
>  {
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
> -    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>  }
>
>  static inline void gen_ldo_env_A0(DisasContext *s, int offset)
>  {
>      int mem_index = s->mem_index;
> -    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
> +    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
>      tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> -    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
> +    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
>      tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
>      tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
>  }
> @@ -2657,8 +2657,8 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
>  {
>      int mem_index = s->mem_index;
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> -    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
> -    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
> +    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
> +    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
>      tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
>  }
> @@ -3128,7 +3128,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              } else {
>                  tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
>                      xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
>              }
>              break;
>          case 0x6e: /* movd mm, ea */
> @@ -3193,7 +3193,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
>                  tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
>                  tcg_gen_movi_tl(cpu_T0, 0);
>                  tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
> @@ -3380,7 +3380,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
>                  tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
> @@ -3555,7 +3555,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
>                  } else {
> -                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
> +                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
>                      tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
>                  }
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
> @@ -3694,13 +3694,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          break;
>                      case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
>                      case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
>                                          offsetof(ZMMReg, ZMM_L(0)));
>                          break;
>                      case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
> -                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
> +                        tcg_gen_qemu_ld_tl(cpu_tmp0, s->A0,
>                                             s->mem_index, MO_LEUW);
>                          tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
>                                          offsetof(ZMMReg, ZMM_W(0)));
> @@ -3789,11 +3789,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>
>                  gen_lea_modrm(env, s, modrm);
>                  if ((b & 1) == 0) {
> -                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
> +                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
>                                         s->mem_index, ot | MO_BE);
>                      gen_op_mov_reg_v(ot, reg, cpu_T0);
>                  } else {
> -                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
> +                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
>                                         s->mem_index, ot | MO_BE);
>                  }
>                  break;
> @@ -3825,23 +3825,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
> -                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
> -                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
> +                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
> +                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
>
>                      bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
>                      zero = tcg_const_tl(0);
> -                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
> +                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
>                                         cpu_T0, zero);
>                      tcg_temp_free(zero);
>
>                      /* Extract the LEN into a mask.  Lengths larger than
>                         operand size get all ones.  */
> -                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
> -                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
> -                                       cpu_A0, bound);
> +                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
> +                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
> +                                       s->A0, bound);
>                      tcg_temp_free(bound);
>                      tcg_gen_movi_tl(cpu_T1, 1);
> -                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
> +                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
>                      tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>
> @@ -3870,9 +3870,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                         bound, bound, cpu_T1);
>                      tcg_temp_free(bound);
>                  }
> -                tcg_gen_movi_tl(cpu_A0, -1);
> -                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
> +                tcg_gen_movi_tl(s->A0, -1);
> +                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
> +                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(ot, reg, cpu_T0);
>                  gen_op_update1_cc();
>                  set_cc_op(s, CC_OP_BMILGB + ot);
> @@ -4124,7 +4124,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_reg_v(ot, rm, cpu_T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
>                      break;
> @@ -4134,7 +4134,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_reg_v(ot, rm, cpu_T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_LEUW);
>                      }
>                      break;
> @@ -4146,7 +4146,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
>                          } else {
> -                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                                  s->mem_index, MO_LEUL);
>                          }
>                      } else { /* pextrq */
> @@ -4157,7 +4157,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
>                          } else {
> -                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
> +                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
>                                                  s->mem_index, MO_LEQ);
>                          }
>  #else
> @@ -4171,7 +4171,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_reg_v(ot, rm, cpu_T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_LEUL);
>                      }
>                      break;
> @@ -4179,7 +4179,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_v_reg(MO_32, cpu_T0, rm);
>                      } else {
> -                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
>                      tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> @@ -4191,7 +4191,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                          offsetof(CPUX86State,xmm_regs[rm]
>                                                  .ZMM_L((val >> 6) & 3)));
>                      } else {
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                      }
>                      tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
> @@ -4219,7 +4219,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
>                          } else {
> -                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                                  s->mem_index, MO_LEUL);
>                          }
>                          tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
> @@ -4230,7 +4230,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
>                          } else {
> -                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
> +                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
>                                                  s->mem_index, MO_LEQ);
>                          }
>                          tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
> @@ -4360,7 +4360,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  switch (sz) {
>                  case 2:
>                      /* 32 bit access */
> -                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
> +                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
>                      tcg_gen_st32_tl(cpu_T0, cpu_env,
>                                      offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
>                      break;
> @@ -4426,15 +4426,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              /* maskmov : we must prepare A0 */
>              if (mod != 3)
>                  goto illegal_op;
> -            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
> -            gen_extu(s->aflag, cpu_A0);
> +            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
> +            gen_extu(s->aflag, s->A0);
>              gen_add_A0_ds_seg(s);
>
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
>              /* XXX: introduce a new table? */
>              sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
> -            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
> +            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, s->A0);
>              break;
>          default:
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
> @@ -4673,7 +4673,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
>                  } else {
> @@ -4760,7 +4760,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* For those below that handle locked memory, don't load here.  */
>              if (!(s->prefix & PREFIX_LOCK)
>                  || op != 2) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              }
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
> @@ -4779,12 +4779,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  tcg_gen_movi_tl(cpu_T0, ~0);
> -                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
>                                              s->mem_index, ot | MO_LE);
>              } else {
>                  tcg_gen_not_tl(cpu_T0, cpu_T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                    gen_op_st_v(s, ot, cpu_T0, s->A0);
>                  } else {
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
> @@ -4802,7 +4802,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  t0 = tcg_temp_local_new();
>                  label1 = gen_new_label();
>
> -                tcg_gen_mov_tl(a0, cpu_A0);
> +                tcg_gen_mov_tl(a0, s->A0);
>                  tcg_gen_mov_tl(t0, cpu_T0);
>
>                  gen_set_label(label1);
> @@ -4822,7 +4822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  tcg_gen_neg_tl(cpu_T0, cpu_T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                    gen_op_st_v(s, ot, cpu_T0, s->A0);
>                  } else {
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
> @@ -5001,7 +5001,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod != 3) {
>              gen_lea_modrm(env, s, modrm);
>              if (op >= 2 && op != 3 && op != 5)
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
>          }
> @@ -5034,9 +5034,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_jr(s, cpu_T0);
>              break;
>          case 3: /* lcall Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
>          do_lcall:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> @@ -5061,9 +5061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_jr(s, cpu_T0);
>              break;
>          case 5: /* ljmp Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
>          do_ljmp:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> @@ -5225,13 +5225,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> -                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
> +                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
>                                              s->mem_index, ot | MO_LE);
>                  tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
>              } else {
> -                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T1, s->A0);
>                  tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, ot, cpu_T0, s->A0);
>              }
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> @@ -5258,7 +5258,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  gen_lea_modrm(env, s, modrm);
> -                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
> +                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
>                                            s->mem_index, ot | MO_LE);
>                  gen_op_mov_reg_v(ot, R_EAX, oldv);
>              } else {
> @@ -5267,7 +5267,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      gen_op_mov_v_reg(ot, oldv, rm);
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, oldv, cpu_A0);
> +                    gen_op_ld_v(s, ot, oldv, s->A0);
>                      rm = 0; /* avoid warning */
>                  }
>                  gen_extu(ot, oldv);
> @@ -5282,7 +5282,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                         must be before changing accumulator to ensure
>                         idempotency if the store faults and the instruction
>                         is restarted */
> -                    gen_op_st_v(s, ot, newv, cpu_A0);
> +                    gen_op_st_v(s, ot, newv, s->A0);
>                      gen_op_mov_reg_v(ot, R_EAX, oldv);
>                  }
>              }
> @@ -5306,9 +5306,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  goto illegal_op;
>              gen_lea_modrm(env, s, modrm);
>              if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
> -                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg16b(cpu_env, s->A0);
>              } else {
> -                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
>              }
>          } else
>  #endif
> @@ -5317,9 +5317,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  goto illegal_op;
>              gen_lea_modrm(env, s, modrm);
>              if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
> -                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg8b(cpu_env, s->A0);
>              } else {
> -                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
>              }
>          }
>          set_cc_op(s, CC_OP_EFLAGS);
> @@ -5453,7 +5453,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          val = insn_get(env, s, ot);
>          tcg_gen_movi_tl(cpu_T0, val);
>          if (mod != 3) {
> -            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, ot, cpu_T0, s->A0);
>          } else {
>              gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
>          }
> @@ -5540,7 +5540,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              }
>          }
> @@ -5554,9 +5554,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>          {
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
> -            TCGv ea = gen_lea_modrm_1(a);
> +            TCGv ea = gen_lea_modrm_1(s, a);
>              gen_lea_v_seg(s, s->aflag, ea, -1, -1);
> -            gen_op_mov_reg_v(dflag, reg, cpu_A0);
> +            gen_op_mov_reg_v(dflag, reg, s->A0);
>          }
>          break;
>
> @@ -5578,24 +5578,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  offset_addr = insn_get(env, s, s->aflag);
>                  break;
>              }
> -            tcg_gen_movi_tl(cpu_A0, offset_addr);
> +            tcg_gen_movi_tl(s->A0, offset_addr);
>              gen_add_A0_ds_seg(s);
>              if ((b & 2) == 0) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
>              } else {
>                  gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
> -                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, ot, cpu_T0, s->A0);
>              }
>          }
>          break;
>      case 0xd7: /* xlat */
> -        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
> +        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
>          tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
> -        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
> -        gen_extu(s->aflag, cpu_A0);
> +        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
> +        gen_extu(s->aflag, s->A0);
>          gen_add_A0_ds_seg(s);
> -        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
>          gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
>          break;
>      case 0xb0 ... 0xb7: /* mov R, Ib */
> @@ -5646,7 +5646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              gen_op_mov_v_reg(ot, cpu_T0, reg);
>              /* for xchg, lock is implicit */
> -            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
>                                     s->mem_index, ot | MO_LE);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> @@ -5675,10 +5675,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3)
>              goto illegal_op;
>          gen_lea_modrm(env, s, modrm);
> -        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T1, s->A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> -        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
>          gen_op_mov_reg_v(ot, reg, cpu_T1);
> @@ -5798,23 +5798,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>                      switch(op >> 4) {
>                      case 0:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 1:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 2:
> -                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
>                          break;
>                      case 3:
>                      default:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LESW);
>                          gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
>                          break;
> @@ -5837,23 +5837,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  case 0:
>                      switch(op >> 4) {
>                      case 0:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 1:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 2:
> -                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
>                          break;
>                      case 3:
>                      default:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LESW);
>                          gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
>                          break;
> @@ -5864,18 +5864,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      switch(op >> 4) {
>                      case 1:
>                          gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          break;
>                      case 2:
>                          gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
> -                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          break;
>                      case 3:
>                      default:
>                          gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUW);
>                          break;
>                      }
> @@ -5885,23 +5885,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      switch(op >> 4) {
>                      case 0:
>                          gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          break;
>                      case 1:
>                          gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          break;
>                      case 2:
>                          gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
> -                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          break;
>                      case 3:
>                      default:
>                          gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUW);
>                          break;
>                      }
> @@ -5911,53 +5911,53 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>                  break;
>              case 0x0c: /* fldenv mem */
> -                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x0d: /* fldcw mem */
> -                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                      s->mem_index, MO_LEUW);
>                  gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
>                  break;
>              case 0x0e: /* fnstenv mem */
> -                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x0f: /* fnstcw mem */
>                  gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
> -                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                      s->mem_index, MO_LEUW);
>                  break;
>              case 0x1d: /* fldt mem */
> -                gen_helper_fldt_ST0(cpu_env, cpu_A0);
> +                gen_helper_fldt_ST0(cpu_env, s->A0);
>                  break;
>              case 0x1f: /* fstpt mem */
> -                gen_helper_fstt_ST0(cpu_env, cpu_A0);
> +                gen_helper_fstt_ST0(cpu_env, s->A0);
>                  gen_helper_fpop(cpu_env);
>                  break;
>              case 0x2c: /* frstor mem */
> -                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x2e: /* fnsave mem */
> -                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x2f: /* fnstsw mem */
>                  gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> -                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                      s->mem_index, MO_LEUW);
>                  break;
>              case 0x3c: /* fbld */
> -                gen_helper_fbld_ST0(cpu_env, cpu_A0);
> +                gen_helper_fbld_ST0(cpu_env, s->A0);
>                  break;
>              case 0x3e: /* fbstp */
> -                gen_helper_fbst_ST0(cpu_env, cpu_A0);
> +                gen_helper_fbst_ST0(cpu_env, s->A0);
>                  gen_helper_fpop(cpu_env);
>                  break;
>              case 0x3d: /* fildll */
> -                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>                  gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
>                  break;
>              case 0x3f: /* fistpll */
>                  gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
> -                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +                tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>                  gen_helper_fpop(cpu_env);
>                  break;
>              default:
> @@ -6471,13 +6471,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              gen_stack_A0(s);
>              /* pop offset */
> -            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
>              /* NOTE: keeping EIP updated is not a problem in case of
>                 exception */
>              gen_op_jmp_v(cpu_T0);
>              /* pop selector */
>              gen_add_A0_im(s, 1 << dflag);
> -            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
>              gen_op_movl_seg_T0_vm(R_CS);
>              /* add stack offset */
>              gen_stack_update(s, val + (2 << dflag));
> @@ -6732,7 +6732,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              s->rip_offset = 1;
>              gen_lea_modrm(env, s, modrm);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              }
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
> @@ -6768,10 +6768,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_exts(ot, cpu_T1);
>              tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
>              tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
> -            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
> -            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
> +            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
> +            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              }
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
> @@ -6785,20 +6785,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              case 0: /* bt */
>                  /* Needs no atomic ops; we surpressed the normal
>                     memory load for LOCK above so do it now.  */
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>                  break;
>              case 1: /* bts */
> -                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
>                                             s->mem_index, ot | MO_LE);
>                  break;
>              case 2: /* btr */
>                  tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
> -                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              default:
>              case 3: /* btc */
> -                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              }
> @@ -6822,7 +6822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              if (op != 0) {
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                    gen_op_st_v(s, ot, cpu_T0, s->A0);
>                  } else {
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
> @@ -7051,9 +7051,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_lea_modrm(env, s, modrm);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
>          if (ot == MO_16) {
> -            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
> +            gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
>          } else {
> -            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
> +            gen_helper_boundl(cpu_env, s->A0, cpu_tmp2_i32);
>          }
>          break;
>      case 0x1c8 ... 0x1cf: /* bswap reg */
> @@ -7293,13 +7293,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_ld32u_tl(cpu_T0,
>                               cpu_env, offsetof(CPUX86State, gdt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
>              gen_add_A0_im(s, 2);
>              tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              break;
>
>          case 0xc8: /* monitor */
> @@ -7308,10 +7308,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
> -            gen_extu(s->aflag, cpu_A0);
> +            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
> +            gen_extu(s->aflag, s->A0);
>              gen_add_A0_ds_seg(s);
> -            gen_helper_monitor(cpu_env, cpu_A0);
> +            gen_helper_monitor(cpu_env, s->A0);
>              break;
>
>          case 0xc9: /* mwait */
> @@ -7348,13 +7348,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
>              gen_add_A0_im(s, 2);
>              tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              break;
>
>          case 0xd0: /* xgetbv */
> @@ -7498,9 +7498,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> @@ -7515,9 +7515,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> @@ -7573,7 +7573,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_lea_modrm(env, s, modrm);
> -            gen_helper_invlpg(cpu_env, cpu_A0);
> +            gen_helper_invlpg(cpu_env, s->A0);
>              gen_jmp_im(s->pc - s->cs_base);
>              gen_eob(s);
>              break;
> @@ -7646,7 +7646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              }
>          } else
> @@ -7667,9 +7667,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = modrm & 7;
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, ot, t0, cpu_A0);
> +                gen_op_ld_v(s, ot, t0, s->A0);
>                  a0 = tcg_temp_local_new();
> -                tcg_gen_mov_tl(a0, cpu_A0);
> +                tcg_gen_mov_tl(a0, s->A0);
>              } else {
>                  gen_op_mov_v_reg(ot, t0, rm);
>                  a0 = NULL;
> @@ -7785,16 +7785,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
>                      if (CODE64(s)) {
> -                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
> -                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 8);
> +                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
>                      } else {
> -                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
> -                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 4);
> +                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
>                      }
>                      /* bnd registers are now in-use */
> @@ -7810,22 +7810,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  if (a.base >= 0) {
> -                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
> +                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_A0, 0);
> +                    tcg_gen_movi_tl(s->A0, 0);
>                  }
> -                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
> +                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
>                      tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
>                  } else {
>                      tcg_gen_movi_tl(cpu_T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
> +                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
>                      tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
>                                     offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
>                  } else {
> -                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
> +                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
>                      tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
>                      tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
>                  }
> @@ -7859,11 +7859,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      /* rip-relative generates #ud */
>                      goto illegal_op;
>                  }
> -                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
> +                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
>                  if (!CODE64(s)) {
> -                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> +                    tcg_gen_ext32u_tl(s->A0, s->A0);
>                  }
> -                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
> +                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
>                  /* bnd registers are now in-use */
>                  gen_set_hflag(s, HF_MPX_IU_MASK);
>                  break;
> @@ -7892,16 +7892,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
>                      if (CODE64(s)) {
> -                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
> -                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 8);
> +                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
>                      } else {
> -                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
> -                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 4);
> +                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
>                      }
>                  }
> @@ -7915,21 +7915,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  if (a.base >= 0) {
> -                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
> +                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_A0, 0);
> +                    tcg_gen_movi_tl(s->A0, 0);
>                  }
> -                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
> +                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
>                      tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
>                  } else {
>                      tcg_gen_movi_tl(cpu_T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
> +                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  } else {
> -                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
> +                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  }
>              }
> @@ -8069,7 +8069,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            gen_helper_fxsave(cpu_env, cpu_A0);
> +            gen_helper_fxsave(cpu_env, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(1): /* fxrstor */
> @@ -8082,7 +8082,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            gen_helper_fxrstor(cpu_env, cpu_A0);
> +            gen_helper_fxrstor(cpu_env, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(2): /* ldmxcsr */
> @@ -8094,7 +8094,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
> +            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0, s->mem_index, MO_LEUL);
>              gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
>              break;
>
> @@ -8108,7 +8108,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
> -            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(4): /* xsave */
> @@ -8120,7 +8120,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
>                                    cpu_regs[R_EDX]);
> -            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
> +            gen_helper_xsave(cpu_env, s->A0, cpu_tmp1_i64);
>              break;
>
>          CASE_MODRM_MEM_OP(5): /* xrstor */
> @@ -8132,7 +8132,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
>                                    cpu_regs[R_EDX]);
> -            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
> +            gen_helper_xrstor(cpu_env, s->A0, cpu_tmp1_i64);
>              /* XRSTOR is how MPX is enabled, which changes how
>                 we translate.  Thus we need to end the TB.  */
>              gen_update_cc_op(s);
> @@ -8160,7 +8160,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_lea_modrm(env, s, modrm);
>                  tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
>                                        cpu_regs[R_EDX]);
> -                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
> +                gen_helper_xsaveopt(cpu_env, s->A0, cpu_tmp1_i64);
>              }
>              break;
>
> @@ -8458,7 +8458,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>
>      cpu_T0 = tcg_temp_new();
>      cpu_T1 = tcg_temp_new();
> -    cpu_A0 = tcg_temp_new();
> +    dc->A0 = tcg_temp_new();
>
>      cpu_tmp0 = tcg_temp_new();
>      cpu_tmp1_i64 = tcg_temp_new_i64();


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
  2018-09-11 20:47   ` Richard Henderson
@ 2018-09-13 14:25   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:25 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 1174 ++++++++++++++++++++-------------------
>  1 file changed, 594 insertions(+), 580 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index c6b1baab9d..73fd7e5b9a 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
>  static TCGv_i64 cpu_bndl[4];
>  static TCGv_i64 cpu_bndu[4];
>  /* local temps */
> -static TCGv cpu_T0, cpu_T1;
> +static TCGv cpu_T1;
>  /* local register indexes (only used inside old micro ops) */
>  static TCGv cpu_tmp0, cpu_tmp4;
>  static TCGv_ptr cpu_ptr0, cpu_ptr1;
> @@ -138,6 +138,7 @@ typedef struct DisasContext {
>      /* TCG local temps */
>      TCGv cc_srcT;
>      TCGv A0;
> +    TCGv T0;
>
>      sigjmp_buf jmpbuf;
>  } DisasContext;
> @@ -412,9 +413,9 @@ static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
>      gen_op_mov_reg_v(size, reg, cpu_tmp0);
>  }
>
> -static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
> +static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
>  {
> -    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
> +    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], s->T0);
>      gen_op_mov_reg_v(size, reg, cpu_tmp0);
>  }
>
> @@ -431,9 +432,9 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
>  static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
>  {
>      if (d == OR_TMP0) {
> -        gen_op_st_v(s, idx, cpu_T0, s->A0);
> +        gen_op_st_v(s, idx, s->T0, s->A0);
>      } else {
> -        gen_op_mov_reg_v(idx, d, cpu_T0);
> +        gen_op_mov_reg_v(idx, d, s->T0);
>      }
>  }
>
> @@ -509,10 +510,10 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s)
>      gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
>  }
>
> -static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
> +static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
>  {
> -    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
> -    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
> +    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
> +    tcg_gen_shli_tl(s->T0, s->T0, ot);
>  };
>
>  static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
> @@ -610,7 +611,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>      target_ulong next_eip;
>
>      if (s->pe && (s->cpl > s->iopl || s->vm86)) {
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          switch (ot) {
>          case MO_8:
>              gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
> @@ -630,7 +631,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>          gen_jmp_im(cur_eip);
>          svm_flags |= (1 << (4 + ot));
>          next_eip = s->pc - s->cs_base;
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
>                                  tcg_const_i32(svm_flags),
>                                  tcg_const_i32(next_eip - cur_eip));
> @@ -640,41 +641,41 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>  static inline void gen_movs(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, ot, s->T0, s->A0);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
> -static void gen_op_update1_cc(void)
> +static void gen_op_update1_cc(DisasContext *s)
>  {
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
> -static void gen_op_update2_cc(void)
> +static void gen_op_update2_cc(DisasContext *s)
>  {
>      tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
> -static void gen_op_update3_cc(TCGv reg)
> +static void gen_op_update3_cc(DisasContext *s, TCGv reg)
>  {
>      tcg_gen_mov_tl(cpu_cc_src2, reg);
>      tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
> -static inline void gen_op_testl_T0_T1_cc(void)
> +static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
>  {
> -    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
> +    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
>  }
>
>  static void gen_op_update_neg_cc(DisasContext *s)
>  {
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +    tcg_gen_neg_tl(cpu_cc_src, s->T0);
>      tcg_gen_movi_tl(s->cc_srcT, 0);
>  }
>
> @@ -1022,11 +1023,11 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
>     value 'b'. In the fast case, T0 is guaranted not to be used. */
>  static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
>  {
> -    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
> +    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
>
>      if (cc.mask != -1) {
> -        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
> -        cc.reg = cpu_T0;
> +        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
> +        cc.reg = s->T0;
>      }
>      if (cc.use_reg2) {
>          tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
> @@ -1040,12 +1041,12 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
>     A translation block must end soon.  */
>  static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
>  {
> -    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
> +    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
>
>      gen_update_cc_op(s);
>      if (cc.mask != -1) {
> -        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
> -        cc.reg = cpu_T0;
> +        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
> +        cc.reg = s->T0;
>      }
>      set_cc_op(s, CC_OP_DYNAMIC);
>      if (cc.use_reg2) {
> @@ -1070,20 +1071,20 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
>
>  static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  {
> -    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
> +    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
>  static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, s->A0);
> -    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> +    gen_op_ld_v(s, ot, s->T0, s->A0);
> +    gen_op_mov_reg_v(ot, R_EAX, s->T0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
>  }
>
>  static inline void gen_scas(DisasContext *s, TCGMemOp ot)
> @@ -1091,8 +1092,8 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>      gen_string_movl_A0_EDI(s);
>      gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_op(s, OP_CMPL, ot, R_EAX);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
>  static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
> @@ -1101,9 +1102,9 @@ static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
>      gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_string_movl_A0_ESI(s);
>      gen_op(s, OP_CMPL, ot, OR_TMP0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
>  static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
> @@ -1127,14 +1128,14 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
>      gen_string_movl_A0_EDI(s);
>      /* Note: we must do this dummy write first to be restartable in
>         case of page fault. */
> -    tcg_gen_movi_tl(cpu_T0, 0);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +    tcg_gen_movi_tl(s->T0, 0);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
> -    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_helper_in_func(ot, s->T0, cpu_tmp2_i32);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>      gen_bpt_io(s, cpu_tmp2_i32, ot);
>      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>          gen_io_end();
> @@ -1147,14 +1148,14 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
>          gen_io_start();
>      }
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, ot, s->T0, s->A0);
>
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
> -    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
> +    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
>      gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
>      gen_bpt_io(s, cpu_tmp2_i32, ot);
>      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>          gen_io_end();
> @@ -1265,103 +1266,103 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
>  static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>  {
>      if (d != OR_TMP0) {
> -        gen_op_mov_v_reg(ot, cpu_T0, d);
> +        gen_op_mov_v_reg(ot, s1->T0, d);
>      } else if (!(s1->prefix & PREFIX_LOCK)) {
> -        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
> +        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>      }
>      switch(op) {
>      case OP_ADCL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
> +            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
> +            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update3_cc(cpu_tmp4);
> +        gen_op_update3_cc(s1, cpu_tmp4);
>          set_cc_op(s1, CC_OP_ADCB + ot);
>          break;
>      case OP_SBBL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
> -            tcg_gen_neg_tl(cpu_T0, cpu_T0);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
> +            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
> +            tcg_gen_neg_tl(s1->T0, s1->T0);
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update3_cc(cpu_tmp4);
> +        gen_op_update3_cc(s1, cpu_tmp4);
>          set_cc_op(s1, CC_OP_SBBB + ot);
>          break;
>      case OP_ADDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update2_cc();
> +        gen_op_update2_cc(s1);
>          set_cc_op(s1, CC_OP_ADDB + ot);
>          break;
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_neg_tl(cpu_T0, cpu_T1);
> -            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
> +            tcg_gen_neg_tl(s1->T0, cpu_T1);
> +            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
> -            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
>          } else {
> -            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
> -            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update2_cc();
> +        gen_op_update2_cc(s1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
>      default:
>      case OP_ANDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update1_cc();
> +        gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_ORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                         s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update1_cc();
> +        gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_XORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update1_cc();
> +        gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_CMPL:
>          tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
> -        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
> +        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> +        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
>      }
> @@ -1371,21 +1372,21 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>  static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
>  {
>      if (s1->prefix & PREFIX_LOCK) {
> -        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
> -        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
> +        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
> +        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                      s1->mem_index, ot | MO_LE);
>      } else {
>          if (d != OR_TMP0) {
> -            gen_op_mov_v_reg(ot, cpu_T0, d);
> +            gen_op_mov_v_reg(ot, s1->T0, d);
>          } else {
> -            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
> +            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>          }
> -        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
> +        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
>          gen_op_st_rm_T0_A0(s1, ot, d);
>      }
>
>      gen_compute_eflags_c(s1, cpu_cc_src);
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
>      set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
>  }
>
> @@ -1441,9 +1442,9 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> @@ -1451,23 +1452,23 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      if (is_right) {
>          if (is_arith) {
> -            gen_exts(ot, cpu_T0);
> -            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
> +            gen_exts(ot, s->T0);
> +            tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
>          } else {
> -            gen_extu(ot, cpu_T0);
> -            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
> +            gen_extu(ot, s->T0);
> +            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
>          }
>      } else {
> -        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> -        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
> +        tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
> +        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
>      }
>
>      /* store */
>      gen_op_st_rm_T0_A0(s, ot, op1);
>
> -    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
> +    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
>  }
>
>  static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
> @@ -1477,25 +1478,25 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>
>      op2 &= mask;
>      if (op2 != 0) {
>          if (is_right) {
>              if (is_arith) {
> -                gen_exts(ot, cpu_T0);
> -                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
> -                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
> +                gen_exts(ot, s->T0);
> +                tcg_gen_sari_tl(cpu_tmp4, s->T0, op2 - 1);
> +                tcg_gen_sari_tl(s->T0, s->T0, op2);
>              } else {
> -                gen_extu(ot, cpu_T0);
> -                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
> -                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
> +                gen_extu(ot, s->T0);
> +                tcg_gen_shri_tl(cpu_tmp4, s->T0, op2 - 1);
> +                tcg_gen_shri_tl(s->T0, s->T0, op2);
>              }
>          } else {
> -            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
> -            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
> +            tcg_gen_shli_tl(cpu_tmp4, s->T0, op2 - 1);
> +            tcg_gen_shli_tl(s->T0, s->T0, op2);
>          }
>      }
>
> @@ -1505,7 +1506,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>      /* update eflags if non zero shift */
>      if (op2 != 0) {
>          tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
> -        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>          set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
>      }
>  }
> @@ -1517,9 +1518,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> @@ -1527,31 +1528,31 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>      switch (ot) {
>      case MO_8:
>          /* Replicate the 8-bit input so that a 32-bit rotate works.  */
> -        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
> -        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
> +        tcg_gen_ext8u_tl(s->T0, s->T0);
> +        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
>          goto do_long;
>      case MO_16:
>          /* Replicate the 16-bit input so that a 32-bit rotate works.  */
> -        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
> +        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
>          goto do_long;
>      do_long:
>  #ifdef TARGET_X86_64
>      case MO_32:
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
>          if (is_right) {
>              tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
>          } else {
>              tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
>          }
> -        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +        tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>          break;
>  #endif
>      default:
>          if (is_right) {
> -            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
>          } else {
> -            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
>          }
>          break;
>      }
> @@ -1567,12 +1568,12 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>         since we've computed the flags into CC_SRC, these variables are
>         currently dead.  */
>      if (is_right) {
> -        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
> -        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
> +        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
> +        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
>          tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
>      } else {
> -        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
> -        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
> +        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
> +        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
>      }
>      tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
>      tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
> @@ -1603,9 +1604,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      op2 &= mask;
> @@ -1613,20 +1614,20 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>          switch (ot) {
>  #ifdef TARGET_X86_64
>          case MO_32:
> -            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>              if (is_right) {
>                  tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
>              } else {
>                  tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
>              }
> -            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +            tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>              break;
>  #endif
>          default:
>              if (is_right) {
> -                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
> +                tcg_gen_rotri_tl(s->T0, s->T0, op2);
>              } else {
> -                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
> +                tcg_gen_rotli_tl(s->T0, s->T0, op2);
>              }
>              break;
>          case MO_8:
> @@ -1639,10 +1640,10 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>              if (is_right) {
>                  shift = mask + 1 - shift;
>              }
> -            gen_extu(ot, cpu_T0);
> -            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
> -            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
> -            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +            gen_extu(ot, s->T0);
> +            tcg_gen_shli_tl(cpu_tmp0, s->T0, shift);
> +            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
> +            tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
>              break;
>          }
>      }
> @@ -1659,12 +1660,12 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>             since we've computed the flags into CC_SRC, these variables are
>             currently dead.  */
>          if (is_right) {
> -            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
> -            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
> +            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
> +            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
>              tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
>          } else {
> -            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
> -            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
> +            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
> +            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
>          }
>          tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
>          tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
> @@ -1681,24 +1682,24 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>
>      if (is_right) {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_16:
> -            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_32:
> -            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #endif
>          default:
> @@ -1707,17 +1708,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      } else {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_16:
> -            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_32:
> -            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #endif
>          default:
> @@ -1737,9 +1738,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      count = tcg_temp_new();
> @@ -1751,11 +1752,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>             This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
>             portion by constructing it as a 32-bit value.  */
>          if (is_right) {
> -            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
> -            tcg_gen_mov_tl(cpu_T1, cpu_T0);
> -            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
> +            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
> +            tcg_gen_mov_tl(cpu_T1, s->T0);
> +            tcg_gen_mov_tl(s->T0, cpu_tmp0);
>          } else {
> -            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
> +            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
>          }
>          /* FALLTHRU */
>  #ifdef TARGET_X86_64
> @@ -1763,28 +1764,28 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>          /* Concatenate the two 32-bit values and use a 64-bit shift.  */
>          tcg_gen_subi_tl(cpu_tmp0, count, 1);
>          if (is_right) {
> -            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
> +            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
> +            tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_shr_i64(s->T0, s->T0, count);
>          } else {
> -            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
> -            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
> +            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
> +            tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_shl_i64(s->T0, s->T0, count);
>              tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
> -            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
> +            tcg_gen_shri_i64(s->T0, s->T0, 32);
>          }
>          break;
>  #endif
>      default:
>          tcg_gen_subi_tl(cpu_tmp0, count, 1);
>          if (is_right) {
> -            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> +            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
> -            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
> +            tcg_gen_shr_tl(s->T0, s->T0, count);
>              tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
>          } else {
> -            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> +            tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
>              if (ot == MO_16) {
>                  /* Only needed if count > 16, for Intel behaviour.  */
>                  tcg_gen_subfi_tl(cpu_tmp4, 33, count);
> @@ -1793,20 +1794,20 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>              }
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
> -            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
> +            tcg_gen_shl_tl(s->T0, s->T0, count);
>              tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
>          }
>          tcg_gen_movi_tl(cpu_tmp4, 0);
>          tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
>                             cpu_tmp4, cpu_T1);
> -        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
> +        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
>          break;
>      }
>
>      /* store */
>      gen_op_st_rm_T0_A0(s, ot, op1);
>
> -    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
> +    gen_shift_flags(s, ot, s->T0, cpu_tmp0, count, is_right);
>      tcg_temp_free(count);
>  }
>
> @@ -2126,23 +2127,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
>      if (mod == 3) {
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, cpu_T0, reg);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                gen_op_mov_v_reg(ot, s->T0, reg);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>          }
>      } else {
>          gen_lea_modrm(env, s, modrm);
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, cpu_T0, reg);
> -            gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                gen_op_mov_v_reg(ot, s->T0, reg);
> +            gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +            gen_op_ld_v(s, ot, s->T0, s->A0);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>          }
>      }
>  }
> @@ -2251,9 +2252,9 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>          cc.reg2 = tcg_const_tl(cc.imm);
>      }
>
> -    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
> -                       cpu_T0, cpu_regs[reg]);
> -    gen_op_mov_reg_v(ot, reg, cpu_T0);
> +    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
> +                       s->T0, cpu_regs[reg]);
> +    gen_op_mov_reg_v(ot, reg, s->T0);
>
>      if (cc.mask != -1) {
>          tcg_temp_free(cc.reg);
> @@ -2263,18 +2264,18 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>      }
>  }
>
> -static inline void gen_op_movl_T0_seg(int seg_reg)
> +static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
>  {
> -    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +    tcg_gen_ld32u_tl(s->T0, cpu_env,
>                       offsetof(CPUX86State,segs[seg_reg].selector));
>  }
>
> -static inline void gen_op_movl_seg_T0_vm(int seg_reg)
> +static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
>  {
> -    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> -    tcg_gen_st32_tl(cpu_T0, cpu_env,
> +    tcg_gen_ext16u_tl(s->T0, s->T0);
> +    tcg_gen_st32_tl(s->T0, cpu_env,
>                      offsetof(CPUX86State,segs[seg_reg].selector));
> -    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
> +    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
>  }
>
>  /* move T0 to seg_reg and compute if the CPU state may change. Never
> @@ -2282,7 +2283,7 @@ static inline void gen_op_movl_seg_T0_vm(int seg_reg)
>  static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
>  {
>      if (s->pe && !s->vm86) {
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
>          /* abort translation because the addseg value may change or
>             because ss32 may change. For R_SS, translation must always
> @@ -2292,7 +2293,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
>              s->base.is_jmp = DISAS_TOO_MANY;
>          }
>      } else {
> -        gen_op_movl_seg_T0_vm(seg_reg);
> +        gen_op_movl_seg_T0_vm(s, seg_reg);
>          if (seg_reg == R_SS) {
>              s->base.is_jmp = DISAS_TOO_MANY;
>          }
> @@ -2356,7 +2357,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
>      TCGMemOp d_ot = mo_pushpop(s, s->dflag);
>
>      gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, d_ot, s->T0, s->A0);
>
>      return d_ot;
>  }
> @@ -2401,8 +2402,8 @@ static void gen_popa(DisasContext *s)
>          }
>          tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
>          gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
> -        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
> -        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
> +        gen_op_ld_v(s, d_ot, s->T0, s->A0);
> +        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
>      }
>
>      gen_stack_update(s, 8 * size);
> @@ -2454,11 +2455,11 @@ static void gen_leave(DisasContext *s)
>      TCGMemOp a_ot = mo_stacksize(s);
>
>      gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, d_ot, s->T0, s->A0);
>
>      tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
>
> -    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
> +    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
>      gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
>  }
>
> @@ -3126,23 +3127,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].ZMM_Q(0)));
>              } else {
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                      xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
> +                gen_op_st_v(s, MO_32, s->T0, s->A0);
>              }
>              break;
>          case 0x6e: /* movd mm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> -                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
> +                tcg_gen_st_tl(s->T0, cpu_env,
> +                              offsetof(CPUX86State, fpregs[reg].mmx));
>              } else
>  #endif
>              {
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
>                  tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx));
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
>              }
>              break;
> @@ -3152,14 +3154,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
>                  tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
> -                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
> +                gen_helper_movq_mm_T0_xmm(cpu_ptr0, s->T0);
>              } else
>  #endif
>              {
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
>                  tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
>              }
>              break;
> @@ -3193,12 +3195,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
> +                gen_op_ld_v(s, MO_32, s->T0, s->A0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
> @@ -3210,9 +3216,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_lea_modrm(env, s, modrm);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].ZMM_Q(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
> @@ -3314,13 +3322,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x7e: /* movd ea, mm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T0, cpu_env,
> +                tcg_gen_ld_i64(s->T0, cpu_env,
>                                 offsetof(CPUX86State,fpregs[reg].mmx));
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld32u_tl(s->T0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
>              }
> @@ -3328,13 +3336,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x17e: /* movd ea, xmm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T0, cpu_env,
> +                tcg_gen_ld_i64(s->T0, cpu_env,
>                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld32u_tl(s->T0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
>              }
> @@ -3379,8 +3387,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x211: /* movss ea, xmm */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
> +                tcg_gen_ld32u_tl(s->T0, cpu_env,
> +                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
> +                gen_op_st_v(s, MO_32, s->T0, s->A0);
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
> @@ -3429,16 +3438,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              }
>              val = x86_ldub_code(env, s);
>              if (is_xmm) {
> -                tcg_gen_movi_tl(cpu_T0, val);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
> +                tcg_gen_movi_tl(s->T0, val);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
>                  op1_offset = offsetof(CPUX86State,xmm_t0);
>              } else {
> -                tcg_gen_movi_tl(cpu_T0, val);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
> +                tcg_gen_movi_tl(s->T0, val);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
>                  op1_offset = offsetof(CPUX86State,mmx_t0);
>              }
>              sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
> @@ -3503,12 +3516,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              if (ot == MO_32) {
>                  SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
>              } else {
>  #ifdef TARGET_X86_64
>                  SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
> -                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
> +                sse_fn_epl(cpu_env, cpu_ptr0, s->T0);
>  #else
>                  goto illegal_op;
>  #endif
> @@ -3555,8 +3568,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
>                  } else {
> -                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
> -                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
> +                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
> +                    tcg_gen_st32_tl(s->T0, cpu_env,
> +                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
>                  }
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>              } else {
> @@ -3568,17 +3582,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  SSEFunc_i_ep sse_fn_i_ep =
>                      sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
>                  sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
> -                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +                tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>              } else {
>  #ifdef TARGET_X86_64
>                  SSEFunc_l_ep sse_fn_l_ep =
>                      sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
> -                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
> +                sse_fn_l_ep(s->T0, cpu_env, cpu_ptr0);
>  #else
>                  goto illegal_op;
>  #endif
>              }
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>              break;
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
> @@ -3587,11 +3601,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              val = x86_ldub_code(env, s);
>              if (b1) {
>                  val &= 7;
> -                tcg_gen_st16_tl(cpu_T0, cpu_env,
> +                tcg_gen_st16_tl(s->T0, cpu_env,
>                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
>              } else {
>                  val &= 3;
> -                tcg_gen_st16_tl(cpu_T0, cpu_env,
> +                tcg_gen_st16_tl(s->T0, cpu_env,
>                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
>              }
>              break;
> @@ -3604,16 +3618,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              if (b1) {
>                  val &= 7;
>                  rm = (modrm & 7) | REX_B(s);
> -                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld16u_tl(s->T0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
>              } else {
>                  val &= 3;
>                  rm = (modrm & 7);
> -                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld16u_tl(s->T0, cpu_env,
>                                  offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
>              }
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> @@ -3760,11 +3774,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
> -                                 cpu_T0, tcg_const_i32(8 << ot));
> +                gen_helper_crc32(s->T0, cpu_tmp2_i32,
> +                                 s->T0, tcg_const_i32(8 << ot));
>
>                  ot = mo_64_32(s->dflag);
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
>
>              case 0x1f0: /* crc32 or movbe */
> @@ -3789,9 +3803,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>
>                  gen_lea_modrm(env, s, modrm);
>                  if ((b & 1) == 0) {
> -                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
> +                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                         s->mem_index, ot | MO_BE);
> -                    gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                    gen_op_mov_reg_v(ot, reg, s->T0);
>                  } else {
>                      tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3806,9 +3820,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  }
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]);
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> -                gen_op_update1_cc();
> +                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_LOGICB + ot);
>                  break;
>
> @@ -3826,12 +3840,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
>                      tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
> -                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
> +                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
>
>                      bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
>                      zero = tcg_const_tl(0);
> -                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
> -                                       cpu_T0, zero);
> +                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
> +                                       s->T0, zero);
>                      tcg_temp_free(zero);
>
>                      /* Extract the LEN into a mask.  Lengths larger than
> @@ -3843,10 +3857,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_movi_tl(cpu_T1, 1);
>                      tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
>                      tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
>
> -                    gen_op_mov_reg_v(ot, reg, cpu_T0);
> -                    gen_op_update1_cc();
> +                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_update1_cc(s);
>                      set_cc_op(s, CC_OP_LOGICB + ot);
>                  }
>                  break;
> @@ -3872,9 +3886,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  }
>                  tcg_gen_movi_tl(s->A0, -1);
>                  tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> -                gen_op_update1_cc();
> +                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
> @@ -3888,7 +3902,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  switch (ot) {
>                  default:
> -                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                      tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
>                      tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                        cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -3897,9 +3911,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>  #ifdef TARGET_X86_64
>                  case MO_64:
> -                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
> -                                      cpu_T0, cpu_regs[R_EDX]);
> -                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
> +                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
> +                                      s->T0, cpu_regs[R_EDX]);
> +                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
>                      tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
>                      break;
>  #endif
> @@ -3921,7 +3935,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  } else {
>                      tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
> +                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
>                  break;
>
>              case 0x2f5: /* pext Gy, By, Ey */
> @@ -3939,7 +3953,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  } else {
>                      tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
> +                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
>                  break;
>
>              case 0x1f6: /* adcx Gy, Ey */
> @@ -3997,22 +4011,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          /* If we know TL is 64-bit, and we want a 32-bit
>                             result, just do everything in 64-bit arithmetic.  */
>                          tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
> -                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
> -                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
> -                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
> -                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
> -                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
> +                        tcg_gen_ext32u_i64(s->T0, s->T0);
> +                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
> +                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
> +                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
> +                        tcg_gen_shri_i64(carry_out, s->T0, 32);
>                          break;
>  #endif
>                      default:
>                          /* Otherwise compute the carry-out in two steps.  */
>                          zero = tcg_const_tl(0);
> -                        tcg_gen_add2_tl(cpu_T0, carry_out,
> -                                        cpu_T0, zero,
> +                        tcg_gen_add2_tl(s->T0, carry_out,
> +                                        s->T0, zero,
>                                          carry_in, zero);
>                          tcg_gen_add2_tl(cpu_regs[reg], carry_out,
>                                          cpu_regs[reg], carry_out,
> -                                        cpu_T0, zero);
> +                                        s->T0, zero);
>                          tcg_temp_free(zero);
>                          break;
>                      }
> @@ -4036,19 +4050,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
>                  }
>                  if (b == 0x1f7) {
> -                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
>                  } else if (b == 0x2f7) {
>                      if (ot != MO_64) {
> -                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext32s_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
>                  } else {
>                      if (ot != MO_64) {
> -                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext32u_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
>                  }
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
>
>              case 0x0f3:
> @@ -4063,25 +4077,25 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>
> -                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> +                tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> +                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
>                      break;
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> +                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
>                      break;
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_neg_tl(cpu_T1, s->T0);
> +                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
>                      break;
>                  default:
>                      goto unknown_op;
>                  }
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
> @@ -4119,22 +4133,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  val = x86_ldub_code(env, s);
>                  switch (b) {
>                  case 0x14: /* pextrb */
> -                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_B(val & 15)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                        gen_op_mov_reg_v(ot, rm, s->T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
>                      break;
>                  case 0x15: /* pextrw */
> -                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_W(val & 7)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                        gen_op_mov_reg_v(ot, rm, s->T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUW);
>                      }
>                      break;
> @@ -4166,23 +4180,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      }
>                      break;
>                  case 0x17: /* extractps */
> -                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_L(val & 3)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                        gen_op_mov_reg_v(ot, rm, s->T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUL);
>                      }
>                      break;
>                  case 0x20: /* pinsrb */
>                      if (mod == 3) {
> -                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
> +                        gen_op_mov_v_reg(MO_32, s->T0, rm);
>                      } else {
> -                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
> -                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_B(val & 15)));
>                      break;
>                  case 0x21: /* insertps */
> @@ -4297,13 +4311,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  b = x86_ldub_code(env, s);
>                  if (ot == MO_64) {
> -                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
> +                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
>                  } else {
> -                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                      tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
> -                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>                  }
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
>
>              default:
> @@ -4360,8 +4374,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  switch (sz) {
>                  case 2:
>                      /* 32 bit access */
> -                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
> -                    tcg_gen_st32_tl(cpu_T0, cpu_env,
> +                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
> +                    tcg_gen_st32_tl(s->T0, cpu_env,
>                                      offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
>                      break;
>                  case 3:
> @@ -4657,8 +4671,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  xor_zero:
>                      /* xor reg, reg optimisation */
>                      set_cc_op(s, CC_OP_CLR);
> -                    tcg_gen_movi_tl(cpu_T0, 0);
> -                    gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                    tcg_gen_movi_tl(s->T0, 0);
> +                    gen_op_mov_reg_v(ot, reg, s->T0);
>                      break;
>                  } else {
>                      opreg = rm;
> @@ -4760,17 +4774,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* For those below that handle locked memory, don't load here.  */
>              if (!(s->prefix & PREFIX_LOCK)
>                  || op != 2) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>
>          switch(op) {
>          case 0: /* test */
>              val = insn_get(env, s, ot);
>              tcg_gen_movi_tl(cpu_T1, val);
> -            gen_op_testl_T0_T1_cc();
> +            gen_op_testl_T0_T1_cc(s);
>              set_cc_op(s, CC_OP_LOGICB + ot);
>              break;
>          case 2: /* not */
> @@ -4778,15 +4792,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod == 3) {
>                      goto illegal_op;
>                  }
> -                tcg_gen_movi_tl(cpu_T0, ~0);
> -                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
> +                tcg_gen_movi_tl(s->T0, ~0);
> +                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
>                                              s->mem_index, ot | MO_LE);
>              } else {
> -                tcg_gen_not_tl(cpu_T0, cpu_T0);
> +                tcg_gen_not_tl(s->T0, s->T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                    gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                  }
>              }
>              break;
> @@ -4803,7 +4817,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  label1 = gen_new_label();
>
>                  tcg_gen_mov_tl(a0, s->A0);
> -                tcg_gen_mov_tl(t0, cpu_T0);
> +                tcg_gen_mov_tl(t0, s->T0);
>
>                  gen_set_label(label1);
>                  t1 = tcg_temp_new();
> @@ -4817,14 +4831,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>                  tcg_temp_free(t2);
>                  tcg_temp_free(a0);
> -                tcg_gen_mov_tl(cpu_T0, t0);
> +                tcg_gen_mov_tl(s->T0, t0);
>                  tcg_temp_free(t0);
>              } else {
> -                tcg_gen_neg_tl(cpu_T0, cpu_T0);
> +                tcg_gen_neg_tl(s->T0, s->T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                    gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                  }
>              }
>              gen_op_update_neg_cc(s);
> @@ -4834,31 +4848,31 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              switch(ot) {
>              case MO_8:
>                  gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> -                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext8u_tl(s->T0, s->T0);
>                  tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
>                  gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> -                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16u_tl(s->T0, s->T0);
>                  tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_shri_tl(s->T0, s->T0, 16);
> +                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
>              default:
>              case MO_32:
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
>                  tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                    cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -4871,7 +4885,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  #ifdef TARGET_X86_64
>              case MO_64:
>                  tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
> -                                  cpu_T0, cpu_regs[R_EAX]);
> +                                  s->T0, cpu_regs[R_EAX]);
>                  tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
>                  tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
>                  set_cc_op(s, CC_OP_MULQ);
> @@ -4883,33 +4897,33 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              switch(ot) {
>              case MO_8:
>                  gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> -                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext8s_tl(s->T0, s->T0);
>                  tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
> -                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
> +                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
>                  gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> -                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16s_tl(s->T0, s->T0);
>                  tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
> -                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
> -                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
> +                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
> +                tcg_gen_shri_tl(s->T0, s->T0, 16);
> +                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
>              default:
>              case MO_32:
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
>                  tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                    cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -4924,7 +4938,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  #ifdef TARGET_X86_64
>              case MO_64:
>                  tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
> -                                  cpu_T0, cpu_regs[R_EAX]);
> +                                  s->T0, cpu_regs[R_EAX]);
>                  tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
>                  tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
>                  tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
> @@ -4936,18 +4950,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 6: /* div */
>              switch(ot) {
>              case MO_8:
> -                gen_helper_divb_AL(cpu_env, cpu_T0);
> +                gen_helper_divb_AL(cpu_env, s->T0);
>                  break;
>              case MO_16:
> -                gen_helper_divw_AX(cpu_env, cpu_T0);
> +                gen_helper_divw_AX(cpu_env, s->T0);
>                  break;
>              default:
>              case MO_32:
> -                gen_helper_divl_EAX(cpu_env, cpu_T0);
> +                gen_helper_divl_EAX(cpu_env, s->T0);
>                  break;
>  #ifdef TARGET_X86_64
>              case MO_64:
> -                gen_helper_divq_EAX(cpu_env, cpu_T0);
> +                gen_helper_divq_EAX(cpu_env, s->T0);
>                  break;
>  #endif
>              }
> @@ -4955,18 +4969,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 7: /* idiv */
>              switch(ot) {
>              case MO_8:
> -                gen_helper_idivb_AL(cpu_env, cpu_T0);
> +                gen_helper_idivb_AL(cpu_env, s->T0);
>                  break;
>              case MO_16:
> -                gen_helper_idivw_AX(cpu_env, cpu_T0);
> +                gen_helper_idivw_AX(cpu_env, s->T0);
>                  break;
>              default:
>              case MO_32:
> -                gen_helper_idivl_EAX(cpu_env, cpu_T0);
> +                gen_helper_idivl_EAX(cpu_env, s->T0);
>                  break;
>  #ifdef TARGET_X86_64
>              case MO_64:
> -                gen_helper_idivq_EAX(cpu_env, cpu_T0);
> +                gen_helper_idivq_EAX(cpu_env, s->T0);
>                  break;
>  #endif
>              }
> @@ -5001,9 +5015,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod != 3) {
>              gen_lea_modrm(env, s, modrm);
>              if (op >= 2 && op != 3 && op != 5)
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>
>          switch(op) {
> @@ -5024,27 +5038,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 2: /* call Ev */
>              /* XXX: optimize if memory (no 'and' is necessary) */
>              if (dflag == MO_16) {
> -                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16u_tl(s->T0, s->T0);
>              }
>              next_eip = s->pc - s->cs_base;
>              tcg_gen_movi_tl(cpu_T1, next_eip);
>              gen_push_v(s, cpu_T1);
> -            gen_op_jmp_v(cpu_T0);
> +            gen_op_jmp_v(s->T0);
>              gen_bnd_jmp(s);
> -            gen_jr(s, cpu_T0);
> +            gen_jr(s, s->T0);
>              break;
>          case 3: /* lcall Ev */
>              gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_lcall:
>              if (s->pe && !s->vm86) {
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
>                                             tcg_const_i32(dflag - 1),
>                                             tcg_const_tl(s->pc - s->cs_base));
>              } else {
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
>                                        tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(s->pc - s->cs_base));
> @@ -5054,30 +5068,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              break;
>          case 4: /* jmp Ev */
>              if (dflag == MO_16) {
> -                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16u_tl(s->T0, s->T0);
>              }
> -            gen_op_jmp_v(cpu_T0);
> +            gen_op_jmp_v(s->T0);
>              gen_bnd_jmp(s);
> -            gen_jr(s, cpu_T0);
> +            gen_jr(s, s->T0);
>              break;
>          case 5: /* ljmp Ev */
>              gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_ljmp:
>              if (s->pe && !s->vm86) {
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
>                                            tcg_const_tl(s->pc - s->cs_base));
>              } else {
> -                gen_op_movl_seg_T0_vm(R_CS);
> +                gen_op_movl_seg_T0_vm(s, R_CS);
>                  gen_op_jmp_v(cpu_T1);
>              }
>              tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
>              gen_jr(s, cpu_tmp4);
>              break;
>          case 6: /* push Ev */
> -            gen_push_v(s, cpu_T0);
> +            gen_push_v(s, s->T0);
>              break;
>          default:
>              goto unknown_op;
> @@ -5093,7 +5107,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>          gen_op_mov_v_reg(ot, cpu_T1, reg);
> -        gen_op_testl_T0_T1_cc();
> +        gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
>
> @@ -5102,9 +5116,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          ot = mo_b_d(b, dflag);
>          val = insn_get(env, s, ot);
>
> -        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
> +        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
>          tcg_gen_movi_tl(cpu_T1, val);
> -        gen_op_testl_T0_T1_cc();
> +        gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
>
> @@ -5112,20 +5126,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
> -            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
> +            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            tcg_gen_ext32s_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
> -            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
> +            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            tcg_gen_ext16s_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
> -            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> +            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
> +            tcg_gen_ext8s_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5135,22 +5149,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
> -            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
> -            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
> +            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
> +            tcg_gen_sari_tl(s->T0, s->T0, 63);
> +            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
> -            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> -            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
> -            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
> +            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            tcg_gen_ext32s_tl(s->T0, s->T0);
> +            tcg_gen_sari_tl(s->T0, s->T0, 31);
> +            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
> -            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> -            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
> -            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
> +            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            tcg_gen_ext16s_tl(s->T0, s->T0);
> +            tcg_gen_sari_tl(s->T0, s->T0, 15);
> +            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5179,14 +5193,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (ot) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
> +            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
>              tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
>              tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
>              tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
>              break;
>  #endif
>          case MO_32:
> -            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>              tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
>              tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -5197,14 +5211,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
>              break;
>          default:
> -            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> +            tcg_gen_ext16s_tl(s->T0, s->T0);
>              tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
>              /* XXX: use 32 bit mul which could be faster */
> -            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
> -            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +            tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
> +            tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>              break;
>          }
>          set_cc_op(s, CC_OP_MULB + ot);
> @@ -5215,27 +5229,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          modrm = x86_ldub_code(env, s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
> -        gen_op_mov_v_reg(ot, cpu_T0, reg);
> +        gen_op_mov_v_reg(ot, s->T0, reg);
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
>              gen_op_mov_v_reg(ot, cpu_T1, rm);
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> -                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
> +                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
>                                              s->mem_index, ot | MO_LE);
> -                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> +                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
>              } else {
>                  gen_op_ld_v(s, ot, cpu_T1, s->A0);
> -                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_st_v(s, ot, s->T0, s->A0);
>              }
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> -        gen_op_update2_cc();
> +        gen_op_update2_cc(s);
>          set_cc_op(s, CC_OP_ADDB + ot);
>          break;
>      case 0x1b0:
> @@ -5328,14 +5342,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          /**************************/
>          /* push/pop */
>      case 0x50 ... 0x57: /* push */
> -        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
> -        gen_push_v(s, cpu_T0);
> +        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x58 ... 0x5f: /* pop */
>          ot = gen_pop_T0(s);
>          /* NOTE: order is important for pop %sp */
>          gen_pop_update(s, ot);
> -        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
> +        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0x60: /* pusha */
>          if (CODE64(s))
> @@ -5354,8 +5368,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              val = insn_get(env, s, ot);
>          else
>              val = (int8_t)insn_get(env, s, MO_8);
> -        tcg_gen_movi_tl(cpu_T0, val);
> -        gen_push_v(s, cpu_T0);
> +        tcg_gen_movi_tl(s->T0, val);
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x8f: /* pop Ev */
>          modrm = x86_ldub_code(env, s);
> @@ -5365,7 +5379,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* NOTE: order is important for pop %sp */
>              gen_pop_update(s, ot);
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> @@ -5391,13 +5405,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x1e: /* push ds */
>          if (CODE64(s))
>              goto illegal_op;
> -        gen_op_movl_T0_seg(b >> 3);
> -        gen_push_v(s, cpu_T0);
> +        gen_op_movl_T0_seg(s, b >> 3);
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x1a0: /* push fs */
>      case 0x1a8: /* push gs */
> -        gen_op_movl_T0_seg((b >> 3) & 7);
> -        gen_push_v(s, cpu_T0);
> +        gen_op_movl_T0_seg(s, (b >> 3) & 7);
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x07: /* pop es */
>      case 0x17: /* pop ss */
> @@ -5451,11 +5465,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>          }
>          val = insn_get(env, s, ot);
> -        tcg_gen_movi_tl(cpu_T0, val);
> +        tcg_gen_movi_tl(s->T0, val);
>          if (mod != 3) {
> -            gen_op_st_v(s, ot, cpu_T0, s->A0);
> +            gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
> +            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
>          }
>          break;
>      case 0x8a:
> @@ -5465,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_reg_v(ot, reg, cpu_T0);
> +        gen_op_mov_reg_v(ot, reg, s->T0);
>          break;
>      case 0x8e: /* mov seg, Gv */
>          modrm = x86_ldub_code(env, s);
> @@ -5491,7 +5505,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          mod = (modrm >> 6) & 3;
>          if (reg >= 6)
>              goto illegal_op;
> -        gen_op_movl_T0_seg(reg);
> +        gen_op_movl_T0_seg(s, reg);
>          ot = mod == 3 ? dflag : MO_16;
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
>          break;
> @@ -5518,30 +5532,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>              if (mod == 3) {
>                  if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
> -                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
> +                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
>                  } else {
> -                    gen_op_mov_v_reg(ot, cpu_T0, rm);
> +                    gen_op_mov_v_reg(ot, s->T0, rm);
>                      switch (s_ot) {
>                      case MO_UB:
> -                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext8u_tl(s->T0, s->T0);
>                          break;
>                      case MO_SB:
> -                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext8s_tl(s->T0, s->T0);
>                          break;
>                      case MO_UW:
> -                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext16u_tl(s->T0, s->T0);
>                          break;
>                      default:
>                      case MO_SW:
> -                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext16s_tl(s->T0, s->T0);
>                          break;
>                      }
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_ld_v(s, s_ot, s->T0, s->A0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              }
>          }
>          break;
> @@ -5581,27 +5595,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_movi_tl(s->A0, offset_addr);
>              gen_add_A0_ds_seg(s);
>              if ((b & 2) == 0) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
> +                gen_op_mov_reg_v(ot, R_EAX, s->T0);
>              } else {
> -                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
> -                gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                gen_op_mov_v_reg(ot, s->T0, R_EAX);
> +                gen_op_st_v(s, ot, s->T0, s->A0);
>              }
>          }
>          break;
>      case 0xd7: /* xlat */
>          tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
> -        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
> -        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
> +        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
> +        tcg_gen_add_tl(s->A0, s->A0, s->T0);
>          gen_extu(s->aflag, s->A0);
>          gen_add_A0_ds_seg(s);
> -        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
> +        gen_op_ld_v(s, MO_8, s->T0, s->A0);
> +        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
>          break;
>      case 0xb0 ... 0xb7: /* mov R, Ib */
>          val = insn_get(env, s, MO_8);
> -        tcg_gen_movi_tl(cpu_T0, val);
> -        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
> +        tcg_gen_movi_tl(s->T0, val);
> +        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0xb8 ... 0xbf: /* mov R, Iv */
>  #ifdef TARGET_X86_64
> @@ -5610,16 +5624,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* 64 bit case */
>              tmp = x86_ldq_code(env, s);
>              reg = (b & 7) | REX_B(s);
> -            tcg_gen_movi_tl(cpu_T0, tmp);
> -            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
> +            tcg_gen_movi_tl(s->T0, tmp);
> +            gen_op_mov_reg_v(MO_64, reg, s->T0);
>          } else
>  #endif
>          {
>              ot = dflag;
>              val = insn_get(env, s, ot);
>              reg = (b & 7) | REX_B(s);
> -            tcg_gen_movi_tl(cpu_T0, val);
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            tcg_gen_movi_tl(s->T0, val);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>          }
>          break;
>
> @@ -5638,15 +5652,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
>          do_xchg_reg:
> -            gen_op_mov_v_reg(ot, cpu_T0, reg);
> +            gen_op_mov_v_reg(ot, s->T0, reg);
>              gen_op_mov_v_reg(ot, cpu_T1, rm);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          } else {
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_mov_v_reg(ot, cpu_T0, reg);
> +            gen_op_mov_v_reg(ot, s->T0, reg);
>              /* for xchg, lock is implicit */
> -            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
> +            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
>                                     s->mem_index, ot | MO_LE);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> @@ -5678,7 +5692,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_op_ld_v(s, ot, cpu_T1, s->A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> -        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
> +        gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
>          gen_op_mov_reg_v(ot, reg, cpu_T1);
> @@ -6220,8 +6234,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  switch(rm) {
>                  case 0:
>                      gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> -                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> -                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> +                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
> +                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                      break;
>                  default:
>                      goto unknown_op;
> @@ -6331,7 +6345,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x6c: /* insS */
>      case 0x6d:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
>          if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
> @@ -6346,7 +6360,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x6e: /* outsS */
>      case 0x6f:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes) | 4);
>          if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
> @@ -6366,7 +6380,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xe5:
>          ot = mo_b_d32(b, dflag);
>          val = x86_ldub_code(env, s);
> -        tcg_gen_movi_tl(cpu_T0, val);
> +        tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -6385,7 +6399,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xe7:
>          ot = mo_b_d32(b, dflag);
>          val = x86_ldub_code(env, s);
> -        tcg_gen_movi_tl(cpu_T0, val);
> +        tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
>          gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> @@ -6405,13 +6419,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xec:
>      case 0xed:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
>          gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
> @@ -6423,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xee:
>      case 0xef:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
>          gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> @@ -6431,7 +6445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
>          gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
> @@ -6448,17 +6462,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          ot = gen_pop_T0(s);
>          gen_stack_update(s, val + (1 << ot));
>          /* Note that gen_pop_T0 uses a zero-extending load.  */
> -        gen_op_jmp_v(cpu_T0);
> +        gen_op_jmp_v(s->T0);
>          gen_bnd_jmp(s);
> -        gen_jr(s, cpu_T0);
> +        gen_jr(s, s->T0);
>          break;
>      case 0xc3: /* ret */
>          ot = gen_pop_T0(s);
>          gen_pop_update(s, ot);
>          /* Note that gen_pop_T0 uses a zero-extending load.  */
> -        gen_op_jmp_v(cpu_T0);
> +        gen_op_jmp_v(s->T0);
>          gen_bnd_jmp(s);
> -        gen_jr(s, cpu_T0);
> +        gen_jr(s, s->T0);
>          break;
>      case 0xca: /* lret im */
>          val = x86_ldsw_code(env, s);
> @@ -6471,14 +6485,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              gen_stack_A0(s);
>              /* pop offset */
> -            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
> +            gen_op_ld_v(s, dflag, s->T0, s->A0);
>              /* NOTE: keeping EIP updated is not a problem in case of
>                 exception */
> -            gen_op_jmp_v(cpu_T0);
> +            gen_op_jmp_v(s->T0);
>              /* pop selector */
>              gen_add_A0_im(s, 1 << dflag);
> -            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
> -            gen_op_movl_seg_T0_vm(R_CS);
> +            gen_op_ld_v(s, dflag, s->T0, s->A0);
> +            gen_op_movl_seg_T0_vm(s, R_CS);
>              /* add stack offset */
>              gen_stack_update(s, val + (2 << dflag));
>          }
> @@ -6521,8 +6535,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else if (!CODE64(s)) {
>                  tval &= 0xffffffff;
>              }
> -            tcg_gen_movi_tl(cpu_T0, next_eip);
> -            gen_push_v(s, cpu_T0);
> +            tcg_gen_movi_tl(s->T0, next_eip);
> +            gen_push_v(s, s->T0);
>              gen_bnd_jmp(s);
>              gen_jmp(s, tval);
>          }
> @@ -6537,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              offset = insn_get(env, s, ot);
>              selector = insn_get(env, s, MO_16);
>
> -            tcg_gen_movi_tl(cpu_T0, selector);
> +            tcg_gen_movi_tl(s->T0, selector);
>              tcg_gen_movi_tl(cpu_T1, offset);
>          }
>          goto do_lcall;
> @@ -6566,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              offset = insn_get(env, s, ot);
>              selector = insn_get(env, s, MO_16);
>
> -            tcg_gen_movi_tl(cpu_T0, selector);
> +            tcg_gen_movi_tl(s->T0, selector);
>              tcg_gen_movi_tl(cpu_T1, offset);
>          }
>          goto do_ljmp;
> @@ -6599,7 +6613,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>      case 0x190 ... 0x19f: /* setcc Gv */
>          modrm = x86_ldub_code(env, s);
> -        gen_setcc1(s, b, cpu_T0);
> +        gen_setcc1(s, b, s->T0);
>          gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
>          break;
>      case 0x140 ... 0x14f: /* cmov Gv, Ev */
> @@ -6620,8 +6634,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> -            gen_helper_read_eflags(cpu_T0, cpu_env);
> -            gen_push_v(s, cpu_T0);
> +            gen_helper_read_eflags(s->T0, cpu_env);
> +            gen_push_v(s, s->T0);
>          }
>          break;
>      case 0x9d: /* popf */
> @@ -6632,13 +6646,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              ot = gen_pop_T0(s);
>              if (s->cpl == 0) {
>                  if (dflag != MO_16) {
> -                    gen_helper_write_eflags(cpu_env, cpu_T0,
> +                    gen_helper_write_eflags(cpu_env, s->T0,
>                                              tcg_const_i32((TF_MASK | AC_MASK |
>                                                             ID_MASK | NT_MASK |
>                                                             IF_MASK |
>                                                             IOPL_MASK)));
>                  } else {
> -                    gen_helper_write_eflags(cpu_env, cpu_T0,
> +                    gen_helper_write_eflags(cpu_env, s->T0,
>                                              tcg_const_i32((TF_MASK | AC_MASK |
>                                                             ID_MASK | NT_MASK |
>                                                             IF_MASK | IOPL_MASK)
> @@ -6647,14 +6661,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  if (s->cpl <= s->iopl) {
>                      if (dflag != MO_16) {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                                  tcg_const_i32((TF_MASK |
>                                                                 AC_MASK |
>                                                                 ID_MASK |
>                                                                 NT_MASK |
>                                                                 IF_MASK)));
>                      } else {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                                  tcg_const_i32((TF_MASK |
>                                                                 AC_MASK |
>                                                                 ID_MASK |
> @@ -6664,11 +6678,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      }
>                  } else {
>                      if (dflag != MO_16) {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                             tcg_const_i32((TF_MASK | AC_MASK |
>                                                            ID_MASK | NT_MASK)));
>                      } else {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                             tcg_const_i32((TF_MASK | AC_MASK |
>                                                            ID_MASK | NT_MASK)
>                                                           & 0xffff));
> @@ -6685,19 +6699,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x9e: /* sahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
> -        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
> +        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
>          gen_compute_eflags(s);
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
> -        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
> -        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
> +        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
> +        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
>          break;
>      case 0x9f: /* lahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
>          gen_compute_eflags(s);
>          /* Note: gen_compute_eflags() only gives the condition codes */
> -        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
> -        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
> +        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
> +        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
>          break;
>      case 0xf5: /* cmc */
>          gen_compute_eflags(s);
> @@ -6732,10 +6746,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              s->rip_offset = 1;
>              gen_lea_modrm(env, s, modrm);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>          /* load shift */
>          val = x86_ldub_code(env, s);
> @@ -6771,10 +6785,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
>              gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>      bt_op:
>          tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
> @@ -6785,46 +6799,46 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              case 0: /* bt */
>                  /* Needs no atomic ops; we surpressed the normal
>                     memory load for LOCK above so do it now.  */
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>                  break;
>              case 1: /* bts */
> -                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, cpu_tmp0,
>                                             s->mem_index, ot | MO_LE);
>                  break;
>              case 2: /* btr */
>                  tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
> -                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              default:
>              case 3: /* btc */
> -                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              }
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
>          } else {
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
>              switch (op) {
>              case 0: /* bt */
>                  /* Data already loaded; nothing to do.  */
>                  break;
>              case 1: /* bts */
> -                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +                tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
>                  break;
>              case 2: /* btr */
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +                tcg_gen_andc_tl(s->T0, s->T0, cpu_tmp0);
>                  break;
>              default:
>              case 3: /* btc */
> -                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +                tcg_gen_xor_tl(s->T0, s->T0, cpu_tmp0);
>                  break;
>              }
>              if (op != 0) {
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                    gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                  }
>              }
>          }
> @@ -6865,7 +6879,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          modrm = x86_ldub_code(env, s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_extu(ot, cpu_T0);
> +        gen_extu(ot, s->T0);
>
>          /* Note that lzcnt and tzcnt are in different extensions.  */
>          if ((prefixes & PREFIX_REPZ)
> @@ -6874,23 +6888,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
>              int size = 8 << ot;
>              /* For lzcnt/tzcnt, C bit is defined related to the input. */
> -            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> +            tcg_gen_mov_tl(cpu_cc_src, s->T0);
>              if (b & 1) {
>                  /* For lzcnt, reduce the target_ulong result by the
>                     number of zeros that we expect to find at the top.  */
> -                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
> -                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
> +                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
> +                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
>              } else {
>                  /* For tzcnt, a zero input must return the operand size.  */
> -                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
> +                tcg_gen_ctzi_tl(s->T0, s->T0, size);
>              }
>              /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
> -            gen_op_update1_cc();
> +            gen_op_update1_cc(s);
>              set_cc_op(s, CC_OP_BMILGB + ot);
>          } else {
>              /* For bsr/bsf, only the Z bit is defined and it is related
>                 to the input and not the result.  */
> -            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>              set_cc_op(s, CC_OP_LOGICB + ot);
>
>              /* ??? The manual says that the output is undefined when the
> @@ -6901,13 +6915,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  /* For bsr, return the bit index of the first 1 bit,
>                     not the count of leading zeros.  */
>                  tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
> -                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
> -                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
> +                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
>              } else {
> -                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
> +                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
>              }
>          }
> -        gen_op_mov_reg_v(ot, reg, cpu_T0);
> +        gen_op_mov_reg_v(ot, reg, s->T0);
>          break;
>          /************************/
>          /* bcd */
> @@ -7047,9 +7061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_op_mov_v_reg(ot, cpu_T0, reg);
> +        gen_op_mov_v_reg(ot, s->T0, reg);
>          gen_lea_modrm(env, s, modrm);
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          if (ot == MO_16) {
>              gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
>          } else {
> @@ -7060,24 +7074,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = (b & 7) | REX_B(s);
>  #ifdef TARGET_X86_64
>          if (dflag == MO_64) {
> -            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
> -            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
> +            gen_op_mov_v_reg(MO_64, s->T0, reg);
> +            tcg_gen_bswap64_i64(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_64, reg, s->T0);
>          } else
>  #endif
>          {
> -            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
> -            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
> -            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
> +            gen_op_mov_v_reg(MO_32, s->T0, reg);
> +            tcg_gen_ext32u_tl(s->T0, s->T0);
> +            tcg_gen_bswap32_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_32, reg, s->T0);
>          }
>          break;
>      case 0xd6: /* salc */
>          if (CODE64(s))
>              goto illegal_op;
> -        gen_compute_eflags_c(s, cpu_T0);
> -        tcg_gen_neg_tl(cpu_T0, cpu_T0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
> +        gen_compute_eflags_c(s, s->T0);
> +        tcg_gen_neg_tl(s->T0, s->T0);
> +        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
>          break;
>      case 0xe0: /* loopnz */
>      case 0xe1: /* loopz */
> @@ -7229,7 +7243,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +            tcg_gen_ld32u_tl(s->T0, cpu_env,
>                               offsetof(CPUX86State, ldt.selector));
>              ot = mod == 3 ? dflag : MO_16;
>              gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> @@ -7242,7 +7256,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
>                  gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_lldt(cpu_env, cpu_tmp2_i32);
>              }
>              break;
> @@ -7250,7 +7264,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +            tcg_gen_ld32u_tl(s->T0, cpu_env,
>                               offsetof(CPUX86State, tr.selector));
>              ot = mod == 3 ? dflag : MO_16;
>              gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> @@ -7263,7 +7277,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
>                  gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_ltr(cpu_env, cpu_tmp2_i32);
>              }
>              break;
> @@ -7274,9 +7288,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
>              gen_update_cc_op(s);
>              if (op == 4) {
> -                gen_helper_verr(cpu_env, cpu_T0);
> +                gen_helper_verr(cpu_env, s->T0);
>              } else {
> -                gen_helper_verw(cpu_env, cpu_T0);
> +                gen_helper_verw(cpu_env, s->T0);
>              }
>              set_cc_op(s, CC_OP_EFLAGS);
>              break;
> @@ -7291,15 +7305,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          CASE_MODRM_MEM_OP(0): /* sgdt */
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_ld32u_tl(cpu_T0,
> +            tcg_gen_ld32u_tl(s->T0,
>                               cpu_env, offsetof(CPUX86State, gdt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
> +            gen_op_st_v(s, MO_16, s->T0, s->A0);
>              gen_add_A0_im(s, 2);
> -            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
> +            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              break;
>
>          case 0xc8: /* monitor */
> @@ -7347,14 +7361,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          CASE_MODRM_MEM_OP(1): /* sidt */
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
> +            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
> +            gen_op_st_v(s, MO_16, s->T0, s->A0);
>              gen_add_A0_im(s, 2);
> -            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
> +            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              break;
>
>          case 0xd0: /* xgetbv */
> @@ -7500,11 +7514,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
> +            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
>              tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
>              break;
>
> @@ -7517,17 +7531,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
> +            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
>              tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
>              break;
>
>          CASE_MODRM_OP(4): /* smsw */
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
> -            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
> +            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
>              if (CODE64(s)) {
>                  mod = (modrm >> 6) & 3;
>                  ot = (mod != 3 ? MO_16 : s->dflag);
> @@ -7560,7 +7574,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
>              gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> -            gen_helper_lmsw(cpu_env, cpu_T0);
> +            gen_helper_lmsw(cpu_env, s->T0);
>              gen_jmp_im(s->pc - s->cs_base);
>              gen_eob(s);
>              break;
> @@ -7584,10 +7598,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (s->cpl != 0) {
>                      gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>                  } else {
> -                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
> +                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
>                      tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
>                                    offsetof(CPUX86State, kernelgsbase));
> -                    tcg_gen_st_tl(cpu_T0, cpu_env,
> +                    tcg_gen_st_tl(s->T0, cpu_env,
>                                    offsetof(CPUX86State, kernelgsbase));
>                  }
>                  break;
> @@ -7638,16 +7652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>
>              if (mod == 3) {
> -                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
> +                gen_op_mov_v_reg(MO_32, s->T0, rm);
>                  /* sign extend */
>                  if (d_ot == MO_64) {
> -                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> +                    tcg_gen_ext32s_tl(s->T0, s->T0);
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              }
>          } else
>  #endif
> @@ -7712,9 +7726,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              t0 = tcg_temp_local_new();
>              gen_update_cc_op(s);
>              if (b == 0x102) {
> -                gen_helper_lar(t0, cpu_env, cpu_T0);
> +                gen_helper_lar(t0, cpu_env, s->T0);
>              } else {
> -                gen_helper_lsl(t0, cpu_env, cpu_T0);
> +                gen_helper_lsl(t0, cpu_env, s->T0);
>              }
>              tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
>              label1 = gen_new_label();
> @@ -7816,16 +7830,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>                  gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
> -                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
> +                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_T0, 0);
> +                    tcg_gen_movi_tl(s->T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
> +                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
>                      tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
>                                     offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
>                  } else {
> -                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
> +                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
>                      tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
>                      tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
>                  }
> @@ -7921,15 +7935,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>                  gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
> -                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
> +                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_T0, 0);
> +                    tcg_gen_movi_tl(s->T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
> +                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  } else {
> -                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
> +                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  }
>              }
> @@ -7973,9 +7987,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_start();
>                      }
> -                    gen_op_mov_v_reg(ot, cpu_T0, rm);
> +                    gen_op_mov_v_reg(ot, s->T0, rm);
>                      gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
> -                                         cpu_T0);
> +                                         s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_end();
>                      }
> @@ -7985,8 +7999,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_start();
>                      }
> -                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_end();
>                      }
> @@ -8019,16 +8033,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              if (b & 2) {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
> -                gen_op_mov_v_reg(ot, cpu_T0, rm);
> +                gen_op_mov_v_reg(ot, s->T0, rm);
>                  tcg_gen_movi_i32(cpu_tmp2_i32, reg);
> -                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
> +                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
>                  gen_jmp_im(s->pc - s->cs_base);
>                  gen_eob(s);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
>                  tcg_gen_movi_i32(cpu_tmp2_i32, reg);
> -                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
> -                gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                gen_helper_get_dr(s->T0, cpu_env, cpu_tmp2_i32);
> +                gen_op_mov_reg_v(ot, rm, s->T0);
>              }
>          }
>          break;
> @@ -8107,8 +8121,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
> -            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
> +            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
> +            gen_op_st_v(s, MO_32, s->T0, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(4): /* xsave */
> @@ -8287,10 +8301,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          }
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_extu(ot, cpu_T0);
> -        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
> -        gen_op_mov_reg_v(ot, reg, cpu_T0);
> +        gen_extu(ot, s->T0);
> +        tcg_gen_mov_tl(cpu_cc_src, s->T0);
> +        tcg_gen_ctpop_tl(s->T0, s->T0);
> +        gen_op_mov_reg_v(ot, reg, s->T0);
>
>          set_cc_op(s, CC_OP_POPCNT);
>          break;
> @@ -8456,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>          printf("ERROR addseg\n");
>  #endif
>
> -    cpu_T0 = tcg_temp_new();
> +    dc->T0 = tcg_temp_new();
>      cpu_T1 = tcg_temp_new();
>      dc->A0 = tcg_temp_new();


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
  2018-09-11 20:48   ` Richard Henderson
@ 2018-09-13 14:26   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:26 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 341 ++++++++++++++++++++--------------------
>  1 file changed, 170 insertions(+), 171 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 73fd7e5b9a..bd27e65344 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -78,8 +78,6 @@ static TCGv cpu_regs[CPU_NB_REGS];
>  static TCGv cpu_seg_base[6];
>  static TCGv_i64 cpu_bndl[4];
>  static TCGv_i64 cpu_bndu[4];
> -/* local temps */
> -static TCGv cpu_T1;
>  /* local register indexes (only used inside old micro ops) */
>  static TCGv cpu_tmp0, cpu_tmp4;
>  static TCGv_ptr cpu_ptr0, cpu_ptr1;
> @@ -139,6 +137,7 @@ typedef struct DisasContext {
>      TCGv cc_srcT;
>      TCGv A0;
>      TCGv T0;
> +    TCGv T1;
>
>      sigjmp_buf jmpbuf;
>  } DisasContext;
> @@ -656,20 +655,20 @@ static void gen_op_update1_cc(DisasContext *s)
>
>  static void gen_op_update2_cc(DisasContext *s)
>  {
> -    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> +    tcg_gen_mov_tl(cpu_cc_src, s->T1);
>      tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
>  static void gen_op_update3_cc(DisasContext *s, TCGv reg)
>  {
>      tcg_gen_mov_tl(cpu_cc_src2, reg);
> -    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> +    tcg_gen_mov_tl(cpu_cc_src, s->T1);
>      tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
>  static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
>  {
> -    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
> +    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
>  }
>
>  static void gen_op_update_neg_cc(DisasContext *s)
> @@ -1090,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +    gen_op_ld_v(s, ot, s->T1, s->A0);
>      gen_op(s, OP_CMPL, ot, R_EAX);
>      gen_op_movl_T0_Dshift(s, ot);
>      gen_op_add_reg_T0(s, s->aflag, R_EDI);
> @@ -1099,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +    gen_op_ld_v(s, ot, s->T1, s->A0);
>      gen_string_movl_A0_ESI(s);
>      gen_op(s, OP_CMPL, ot, OR_TMP0);
>      gen_op_movl_T0_Dshift(s, ot);
> @@ -1274,11 +1273,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_ADCL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, cpu_tmp4, s1->T1);
>              tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
>              tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> @@ -1288,12 +1287,12 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_SBBL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
> +            tcg_gen_add_tl(s1->T0, s1->T1, cpu_tmp4);
>              tcg_gen_neg_tl(s1->T0, s1->T0);
>              tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
>              tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> @@ -1302,10 +1301,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ADDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update2_cc(s1);
> @@ -1313,13 +1312,13 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_neg_tl(s1->T0, cpu_T1);
> +            tcg_gen_neg_tl(s1->T0, s1->T1);
>              tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
> -            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
>          } else {
>              tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> -            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update2_cc(s1);
> @@ -1328,10 +1327,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      default:
>      case OP_ANDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update1_cc(s1);
> @@ -1339,10 +1338,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                         s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update1_cc(s1);
> @@ -1350,19 +1349,19 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_XORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_CMPL:
> -        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> +        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
>          tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> -        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
> +        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
>      }
> @@ -1447,28 +1446,28 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>          gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
> -    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> -    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
> +    tcg_gen_andi_tl(s->T1, s->T1, mask);
> +    tcg_gen_subi_tl(cpu_tmp0, s->T1, 1);
>
>      if (is_right) {
>          if (is_arith) {
>              gen_exts(ot, s->T0);
>              tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
> -            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
>          } else {
>              gen_extu(ot, s->T0);
>              tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
> -            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>          }
>      } else {
>          tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
> -        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
> +        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
>      }
>
>      /* store */
>      gen_op_st_rm_T0_A0(s, ot, op1);
>
> -    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
> +    gen_shift_flags(s, ot, s->T0, cpu_tmp0, s->T1, is_right);
>  }
>
>  static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
> @@ -1523,7 +1522,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>          gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
> -    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> +    tcg_gen_andi_tl(s->T1, s->T1, mask);
>
>      switch (ot) {
>      case MO_8:
> @@ -1539,7 +1538,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>  #ifdef TARGET_X86_64
>      case MO_32:
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>          if (is_right) {
>              tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
>          } else {
> @@ -1550,9 +1549,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>  #endif
>      default:
>          if (is_right) {
> -            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
>          } else {
> -            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
>          }
>          break;
>      }
> @@ -1584,7 +1583,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>         exactly as we computed above.  */
>      t0 = tcg_const_i32(0);
>      t1 = tcg_temp_new_i32();
> -    tcg_gen_trunc_tl_i32(t1, cpu_T1);
> +    tcg_gen_trunc_tl_i32(t1, s->T1);
>      tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
>      tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
>      tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
> @@ -1689,17 +1688,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (is_right) {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_16:
> -            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_32:
> -            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #endif
>          default:
> @@ -1708,17 +1707,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      } else {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_16:
> -            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_32:
> -            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #endif
>          default:
> @@ -1752,11 +1751,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>             This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
>             portion by constructing it as a 32-bit value.  */
>          if (is_right) {
> -            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
> -            tcg_gen_mov_tl(cpu_T1, s->T0);
> +            tcg_gen_deposit_tl(cpu_tmp0, s->T0, s->T1, 16, 16);
> +            tcg_gen_mov_tl(s->T1, s->T0);
>              tcg_gen_mov_tl(s->T0, cpu_tmp0);
>          } else {
> -            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
> +            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
>          }
>          /* FALLTHRU */
>  #ifdef TARGET_X86_64
> @@ -1764,11 +1763,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>          /* Concatenate the two 32-bit values and use a 64-bit shift.  */
>          tcg_gen_subi_tl(cpu_tmp0, count, 1);
>          if (is_right) {
> -            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
> +            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
>              tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
>              tcg_gen_shr_i64(s->T0, s->T0, count);
>          } else {
> -            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
> +            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
>              tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
>              tcg_gen_shl_i64(s->T0, s->T0, count);
>              tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
> @@ -1783,24 +1782,24 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
>              tcg_gen_shr_tl(s->T0, s->T0, count);
> -            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
> +            tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
>          } else {
>              tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
>              if (ot == MO_16) {
>                  /* Only needed if count > 16, for Intel behaviour.  */
>                  tcg_gen_subfi_tl(cpu_tmp4, 33, count);
> -                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
> +                tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
>                  tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
>              }
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
>              tcg_gen_shl_tl(s->T0, s->T0, count);
> -            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
> +            tcg_gen_shr_tl(s->T1, s->T1, cpu_tmp4);
>          }
>          tcg_gen_movi_tl(cpu_tmp4, 0);
> -        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
> -                           cpu_tmp4, cpu_T1);
> -        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
> +        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, cpu_tmp4,
> +                           cpu_tmp4, s->T1);
> +        tcg_gen_or_tl(s->T0, s->T0, s->T1);
>          break;
>      }
>
> @@ -1814,7 +1813,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>  static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
>  {
>      if (s != OR_TMP1)
> -        gen_op_mov_v_reg(ot, cpu_T1, s);
> +        gen_op_mov_v_reg(ot, s1->T1, s);
>      switch(op) {
>      case OP_ROL:
>          gen_rot_rm_T1(s1, ot, d, 0);
> @@ -1862,7 +1861,7 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
>          break;
>      default:
>          /* currently not optimized */
> -        tcg_gen_movi_tl(cpu_T1, c);
> +        tcg_gen_movi_tl(s1->T1, c);
>          gen_shift(s1, op, ot, d, OR_TMP1);
>          break;
>      }
> @@ -2242,7 +2241,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>
>      gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>
> -    cc = gen_prepare_cc(s, b, cpu_T1);
> +    cc = gen_prepare_cc(s, b, s->T1);
>      if (cc.mask != -1) {
>          TCGv t0 = tcg_temp_new();
>          tcg_gen_andi_tl(t0, cc.reg, cc.mask);
> @@ -2416,8 +2415,8 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>      int size = 1 << d_ot;
>
>      /* Push BP; compute FrameTemp into T1.  */
> -    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
> -    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
> +    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
> +    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
>      gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
>
>      level &= 31;
> @@ -2430,23 +2429,23 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>              gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
>              gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
>
> -            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
> +            tcg_gen_subi_tl(s->A0, s->T1, size * i);
>              gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
>              gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
>          }
>
>          /* Push the current FrameTemp as the last level.  */
> -        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
> +        tcg_gen_subi_tl(s->A0, s->T1, size * level);
>          gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> -        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
> +        gen_op_st_v(s, d_ot, s->T1, s->A0);
>      }
>
>      /* Copy the FrameTemp value to EBP.  */
> -    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
> +    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
>
>      /* Compute the final value of ESP.  */
> -    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
> -    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
> +    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
> +    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_leave(DisasContext *s)
> @@ -2457,10 +2456,10 @@ static void gen_leave(DisasContext *s)
>      gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
>      gen_op_ld_v(s, d_ot, s->T0, s->A0);
>
> -    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
> +    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
>
>      gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
> -    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
> +    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
> @@ -3854,10 +3853,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
>                                         s->A0, bound);
>                      tcg_temp_free(bound);
> -                    tcg_gen_movi_tl(cpu_T1, 1);
> -                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
> -                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
> -                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_movi_tl(s->T1, 1);
> +                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
> +                    tcg_gen_subi_tl(s->T1, s->T1, 1);
> +                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
>
>                      gen_op_mov_reg_v(ot, reg, s->T0);
>                      gen_op_update1_cc(s);
> @@ -3873,19 +3872,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  }
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
>                  {
>                      TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
>                      /* Note that since we're using BMILG (in order to get O
>                         cleared) we need to store the inverse into C.  */
>                      tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
> -                                       cpu_T1, bound);
> -                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
> -                                       bound, bound, cpu_T1);
> +                                       s->T1, bound);
> +                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
> +                                       bound, bound, s->T1);
>                      tcg_temp_free(bound);
>                  }
>                  tcg_gen_movi_tl(s->A0, -1);
> -                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
> +                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
>                  tcg_gen_andc_tl(s->T0, s->T0, s->A0);
>                  gen_op_mov_reg_v(ot, reg, s->T0);
>                  gen_op_update1_cc(s);
> @@ -3911,10 +3910,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>  #ifdef TARGET_X86_64
>                  case MO_64:
> -                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
> +                    tcg_gen_mulu2_i64(s->T0, s->T1,
>                                        s->T0, cpu_regs[R_EDX]);
>                      tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
> -                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
> +                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
>                      break;
>  #endif
>                  }
> @@ -3931,11 +3930,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> -                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
>                  } else {
> -                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
> +                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
>                  break;
>
>              case 0x2f5: /* pext Gy, By, Ey */
> @@ -3949,11 +3948,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> -                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
>                  } else {
> -                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
> +                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
>                  break;
>
>              case 0x1f6: /* adcx Gy, Ey */
> @@ -4045,22 +4044,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  if (ot == MO_64) {
> -                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
> +                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
>                  } else {
> -                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
> +                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
>                  }
>                  if (b == 0x1f7) {
> -                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
>                  } else if (b == 0x2f7) {
>                      if (ot != MO_64) {
>                          tcg_gen_ext32s_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
>                  } else {
>                      if (ot != MO_64) {
>                          tcg_gen_ext32u_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>                  }
>                  gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
> @@ -4080,16 +4079,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> -                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_subi_tl(s->T1, s->T0, 1);
> +                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
>                      break;
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> -                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_subi_tl(s->T1, s->T0, 1);
> +                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
>                      break;
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_neg_tl(cpu_T1, s->T0);
> -                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_neg_tl(s->T1, s->T0);
> +                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
>                      break;
>                  default:
>                      goto unknown_op;
> @@ -4677,7 +4676,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else {
>                      opreg = rm;
>                  }
> -                gen_op_mov_v_reg(ot, cpu_T1, reg);
> +                gen_op_mov_v_reg(ot, s->T1, reg);
>                  gen_op(s, op, ot, opreg);
>                  break;
>              case 1: /* OP Gv, Ev */
> @@ -4687,17 +4686,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +                    gen_op_ld_v(s, ot, s->T1, s->A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
>                  } else {
> -                    gen_op_mov_v_reg(ot, cpu_T1, rm);
> +                    gen_op_mov_v_reg(ot, s->T1, rm);
>                  }
>                  gen_op(s, op, ot, reg);
>                  break;
>              case 2: /* OP A, Iv */
>                  val = insn_get(env, s, ot);
> -                tcg_gen_movi_tl(cpu_T1, val);
> +                tcg_gen_movi_tl(s->T1, val);
>                  gen_op(s, op, ot, OR_EAX);
>                  break;
>              }
> @@ -4743,7 +4742,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  val = (int8_t)insn_get(env, s, MO_8);
>                  break;
>              }
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>              gen_op(s, op, ot, opreg);
>          }
>          break;
> @@ -4783,7 +4782,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch(op) {
>          case 0: /* test */
>              val = insn_get(env, s, ot);
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>              gen_op_testl_T0_T1_cc(s);
>              set_cc_op(s, CC_OP_LOGICB + ot);
>              break;
> @@ -4847,22 +4846,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 4: /* mul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8u_tl(s->T0, s->T0);
> -                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext8u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16u_tl(s->T0, s->T0);
> -                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext16u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_shri_tl(s->T0, s->T0, 16);
> @@ -4896,11 +4895,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 5: /* imul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8s_tl(s->T0, s->T0);
> -                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext8s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
> @@ -4908,11 +4907,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16s_tl(s->T0, s->T0);
> -                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext16s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
> @@ -5041,25 +5040,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  tcg_gen_ext16u_tl(s->T0, s->T0);
>              }
>              next_eip = s->pc - s->cs_base;
> -            tcg_gen_movi_tl(cpu_T1, next_eip);
> -            gen_push_v(s, cpu_T1);
> +            tcg_gen_movi_tl(s->T1, next_eip);
> +            gen_push_v(s, s->T1);
>              gen_op_jmp_v(s->T0);
>              gen_bnd_jmp(s);
>              gen_jr(s, s->T0);
>              break;
>          case 3: /* lcall Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +            gen_op_ld_v(s, ot, s->T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
>              gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_lcall:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
> +                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, s->T1,
>                                             tcg_const_i32(dflag - 1),
>                                             tcg_const_tl(s->pc - s->cs_base));
>              } else {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
> +                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, s->T1,
>                                        tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(s->pc - s->cs_base));
>              }
> @@ -5075,17 +5074,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_jr(s, s->T0);
>              break;
>          case 5: /* ljmp Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +            gen_op_ld_v(s, ot, s->T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
>              gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_ljmp:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
> +                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, s->T1,
>                                            tcg_const_tl(s->pc - s->cs_base));
>              } else {
>                  gen_op_movl_seg_T0_vm(s, R_CS);
> -                gen_op_jmp_v(cpu_T1);
> +                gen_op_jmp_v(s->T1);
>              }
>              tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
>              gen_jr(s, cpu_tmp4);
> @@ -5106,7 +5105,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_v_reg(ot, cpu_T1, reg);
> +        gen_op_mov_v_reg(ot, s->T1, reg);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
> @@ -5117,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          val = insn_get(env, s, ot);
>
>          gen_op_mov_v_reg(ot, s->T0, OR_EAX);
> -        tcg_gen_movi_tl(cpu_T1, val);
> +        tcg_gen_movi_tl(s->T1, val);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
> @@ -5183,25 +5182,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>          if (b == 0x69) {
>              val = insn_get(env, s, ot);
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>          } else if (b == 0x6b) {
>              val = (int8_t)insn_get(env, s, MO_8);
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T1, reg);
> +            gen_op_mov_v_reg(ot, s->T1, reg);
>          }
>          switch (ot) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
> +            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
>              tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
>              tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
> -            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
> +            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
>              break;
>  #endif
>          case MO_32:
>              tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>              tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                cpu_tmp2_i32, cpu_tmp3_i32);
>              tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
> @@ -5212,9 +5211,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              break;
>          default:
>              tcg_gen_ext16s_tl(s->T0, s->T0);
> -            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
> +            tcg_gen_ext16s_tl(s->T1, s->T1);
>              /* XXX: use 32 bit mul which could be faster */
> -            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>              tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>              tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
>              tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
> @@ -5232,22 +5231,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_op_mov_v_reg(ot, s->T0, reg);
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_v_reg(ot, cpu_T1, rm);
> -            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_v_reg(ot, s->T1, rm);
> +            tcg_gen_add_tl(s->T0, s->T0, s->T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>              gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> -                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
> +                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
>                                              s->mem_index, ot | MO_LE);
> -                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_add_tl(s->T0, s->T0, s->T1);
>              } else {
> -                gen_op_ld_v(s, ot, cpu_T1, s->A0);
> -                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_ld_v(s, ot, s->T1, s->A0);
> +                tcg_gen_add_tl(s->T0, s->T0, s->T1);
>                  gen_op_st_v(s, ot, s->T0, s->A0);
>              }
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>          }
>          gen_op_update2_cc(s);
>          set_cc_op(s, CC_OP_ADDB + ot);
> @@ -5653,16 +5652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>          do_xchg_reg:
>              gen_op_mov_v_reg(ot, s->T0, reg);
> -            gen_op_mov_v_reg(ot, cpu_T1, rm);
> +            gen_op_mov_v_reg(ot, s->T1, rm);
>              gen_op_mov_reg_v(ot, rm, s->T0);
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              gen_op_mov_v_reg(ot, s->T0, reg);
>              /* for xchg, lock is implicit */
> -            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
> +            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
>                                     s->mem_index, ot | MO_LE);
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>          }
>          break;
>      case 0xc4: /* les Gv */
> @@ -5689,13 +5688,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3)
>              goto illegal_op;
>          gen_lea_modrm(env, s, modrm);
> -        gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +        gen_op_ld_v(s, ot, s->T1, s->A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
>          gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
> -        gen_op_mov_reg_v(ot, reg, cpu_T1);
> +        gen_op_mov_reg_v(ot, reg, s->T1);
>          if (s->base.is_jmp) {
>              gen_jmp_im(s->pc - s->cs_base);
>              gen_eob(s);
> @@ -5774,7 +5773,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              opreg = rm;
>          }
> -        gen_op_mov_v_reg(ot, cpu_T1, reg);
> +        gen_op_mov_v_reg(ot, s->T1, reg);
>
>          if (shift) {
>              TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
> @@ -6387,8 +6386,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_io_start();
>  	}
>          tcg_gen_movi_i32(cpu_tmp2_i32, val);
> -        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
> +        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
> +        gen_op_mov_reg_v(ot, R_EAX, s->T1);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6402,13 +6401,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> +        gen_op_mov_v_reg(ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
>          tcg_gen_movi_i32(cpu_tmp2_i32, val);
> -        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>          gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -6426,8 +6425,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_io_start();
>  	}
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
> +        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
> +        gen_op_mov_reg_v(ot, R_EAX, s->T1);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6440,13 +6439,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> +        gen_op_mov_v_reg(ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>          gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -6552,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              selector = insn_get(env, s, MO_16);
>
>              tcg_gen_movi_tl(s->T0, selector);
> -            tcg_gen_movi_tl(cpu_T1, offset);
> +            tcg_gen_movi_tl(s->T1, offset);
>          }
>          goto do_lcall;
>      case 0xe9: /* jmp im */
> @@ -6581,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              selector = insn_get(env, s, MO_16);
>
>              tcg_gen_movi_tl(s->T0, selector);
> -            tcg_gen_movi_tl(cpu_T1, offset);
> +            tcg_gen_movi_tl(s->T1, offset);
>          }
>          goto do_ljmp;
>      case 0xeb: /* jmp Jb */
> @@ -6753,7 +6752,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          }
>          /* load shift */
>          val = x86_ldub_code(env, s);
> -        tcg_gen_movi_tl(cpu_T1, val);
> +        tcg_gen_movi_tl(s->T1, val);
>          if (op < 4)
>              goto unknown_op;
>          op -= 4;
> @@ -6775,12 +6774,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
>          rm = (modrm & 7) | REX_B(s);
> -        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
> +        gen_op_mov_v_reg(MO_32, s->T1, reg);
>          if (mod != 3) {
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
>              /* specific case: we need to add a displacement */
> -            gen_exts(ot, cpu_T1);
> -            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
> +            gen_exts(ot, s->T1);
> +            tcg_gen_sari_tl(cpu_tmp0, s->T1, 3 + ot);
>              tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
>              tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
>              gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
> @@ -6791,9 +6790,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>      bt_op:
> -        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
> +        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
>          tcg_gen_movi_tl(cpu_tmp0, 1);
> -        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
> +        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, s->T1);
>          if (s->prefix & PREFIX_LOCK) {
>              switch (op) {
>              case 0: /* bt */
> @@ -6816,9 +6815,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              }
> -            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
>          } else {
> -            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
>              switch (op) {
>              case 0: /* bt */
>                  /* Data already loaded; nothing to do.  */
> @@ -6914,8 +6913,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (b & 1) {
>                  /* For bsr, return the bit index of the first 1 bit,
>                     not the count of leading zeros.  */
> -                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
> -                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
> +                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
>                  tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
>              } else {
>                  tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
> @@ -7512,14 +7511,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T1, s->A0);
>              gen_add_A0_im(s, 2);
>              gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
>              tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
> -            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
> +            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
>              break;
>
>          CASE_MODRM_MEM_OP(3): /* lidt */
> @@ -7529,14 +7528,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T1, s->A0);
>              gen_add_A0_im(s, 2);
>              gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
>              tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
> -            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
> +            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
>              break;
>
>          CASE_MODRM_OP(4): /* smsw */
> @@ -8471,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>  #endif
>
>      dc->T0 = tcg_temp_new();
> -    cpu_T1 = tcg_temp_new();
> +    dc->T1 = tcg_temp_new();
>      dc->A0 = tcg_temp_new();
>
>      cpu_tmp0 = tcg_temp_new();


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
  2018-09-11 20:58   ` Richard Henderson
@ 2018-09-13 14:31   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:31 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> And convert it to a bool to use an existing hole
> in the struct.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 307 ++++++++++++++++++++--------------------
>  1 file changed, 154 insertions(+), 153 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 61a98ef872..b8222dc4ba 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -81,10 +81,6 @@ static TCGv_i64 cpu_bndu[4];
>
>  #include "exec/gen-icount.h"
>
> -#ifdef TARGET_X86_64
> -static int x86_64_hregs;
> -#endif
> -
>  typedef struct DisasContext {
>      DisasContextBase base;
>
> @@ -109,6 +105,9 @@ typedef struct DisasContext {
>      int ss32;   /* 32 bit stack segment */
>      CCOp cc_op;  /* current CC operation */
>      bool cc_op_dirty;
> +#ifdef TARGET_X86_64
> +    bool x86_64_hregs;
> +#endif
>      int addseg; /* non zero if either DS/ES/SS have a non zero base */
>      int f_st;   /* currently unused */
>      int vm86;   /* vm86 mode */
> @@ -307,13 +306,13 @@ static void gen_update_cc_op(DisasContext *s)
>   * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
>   * true for this special case, false otherwise.
>   */
> -static inline bool byte_reg_is_xH(int reg)
> +static inline bool byte_reg_is_xH(DisasContext *s, int reg)
>  {
>      if (reg < 4) {
>          return false;
>      }
>  #ifdef TARGET_X86_64
> -    if (reg >= 8 || x86_64_hregs) {
> +    if (reg >= 8 || s->x86_64_hregs) {
>          return false;
>      }
>  #endif
> @@ -360,11 +359,11 @@ static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
>      return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
>  }
>
> -static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
> +static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
>  {
>      switch(ot) {
>      case MO_8:
> -        if (!byte_reg_is_xH(reg)) {
> +        if (!byte_reg_is_xH(s, reg)) {
>              tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
>          } else {
>              tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
> @@ -388,9 +387,10 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
>      }
>  }
>
> -static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
> +static inline
> +void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
>  {
> -    if (ot == MO_8 && byte_reg_is_xH(reg)) {
> +    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
>          tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
>      } else {
>          tcg_gen_mov_tl(t0, cpu_regs[reg]);
> @@ -414,13 +414,13 @@ static inline
>  void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
>  {
>      tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
> -    gen_op_mov_reg_v(size, reg, s->tmp0);
> +    gen_op_mov_reg_v(s, size, reg, s->tmp0);
>  }
>
>  static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
>  {
>      tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
> -    gen_op_mov_reg_v(size, reg, s->tmp0);
> +    gen_op_mov_reg_v(s, size, reg, s->tmp0);
>  }
>
>  static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
> @@ -438,7 +438,7 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
>      if (d == OR_TMP0) {
>          gen_op_st_v(s, idx, s->T0, s->A0);
>      } else {
> -        gen_op_mov_reg_v(idx, d, s->T0);
> +        gen_op_mov_reg_v(s, idx, d, s->T0);
>      }
>  }
>
> @@ -1077,7 +1077,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
>
>  static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  {
> -    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
>      gen_string_movl_A0_EDI(s);
>      gen_op_st_v(s, ot, s->T0, s->A0);
>      gen_op_movl_T0_Dshift(s, ot);
> @@ -1088,7 +1088,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
>      gen_op_ld_v(s, ot, s->T0, s->A0);
> -    gen_op_mov_reg_v(ot, R_EAX, s->T0);
> +    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
>      gen_op_movl_T0_Dshift(s, ot);
>      gen_op_add_reg_T0(s, s->aflag, R_ESI);
>  }
> @@ -1272,7 +1272,7 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
>  static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>  {
>      if (d != OR_TMP0) {
> -        gen_op_mov_v_reg(ot, s1->T0, d);
> +        gen_op_mov_v_reg(s1, ot, s1->T0, d);
>      } else if (!(s1->prefix & PREFIX_LOCK)) {
>          gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>      }
> @@ -1383,7 +1383,7 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
>                                      s1->mem_index, ot | MO_LE);
>      } else {
>          if (d != OR_TMP0) {
> -            gen_op_mov_v_reg(ot, s1->T0, d);
> +            gen_op_mov_v_reg(s1, ot, s1->T0, d);
>          } else {
>              gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>          }
> @@ -1450,7 +1450,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(s->T1, s->T1, mask);
> @@ -1486,7 +1486,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>      if (op1 == OR_TMP0)
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>
>      op2 &= mask;
>      if (op2 != 0) {
> @@ -1526,7 +1526,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(s->T1, s->T1, mask);
> @@ -1612,7 +1612,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      op2 &= mask;
> @@ -1690,7 +1690,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (op1 == OR_TMP0)
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>
>      if (is_right) {
>          switch (ot) {
> @@ -1746,7 +1746,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      count = tcg_temp_new();
> @@ -1820,7 +1820,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>  static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
>  {
>      if (s != OR_TMP1)
> -        gen_op_mov_v_reg(ot, s1->T1, s);
> +        gen_op_mov_v_reg(s1, ot, s1->T1, s);
>      switch(op) {
>      case OP_ROL:
>          gen_rot_rm_T1(s1, ot, d, 0);
> @@ -2133,23 +2133,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
>      if (mod == 3) {
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, s->T0, reg);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> +                gen_op_mov_v_reg(s, ot, s->T0, reg);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>          }
>      } else {
>          gen_lea_modrm(env, s, modrm);
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, s->T0, reg);
> +                gen_op_mov_v_reg(s, ot, s->T0, reg);
>              gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
>              gen_op_ld_v(s, ot, s->T0, s->A0);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>          }
>      }
>  }
> @@ -2260,7 +2260,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>
>      tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
>                         s->T0, cpu_regs[reg]);
> -    gen_op_mov_reg_v(ot, reg, s->T0);
> +    gen_op_mov_reg_v(s, ot, reg, s->T0);
>
>      if (cc.mask != -1) {
>          tcg_temp_free(cc.reg);
> @@ -2354,7 +2354,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
>      }
>
>      gen_op_st_v(s, d_ot, val, s->A0);
> -    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
> +    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
>  }
>
>  /* two step pop is necessary for precise exceptions */
> @@ -2409,7 +2409,7 @@ static void gen_popa(DisasContext *s)
>          tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
>          gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
>          gen_op_ld_v(s, d_ot, s->T0, s->A0);
> -        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
> +        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
>      }
>
>      gen_stack_update(s, 8 * size);
> @@ -2448,11 +2448,11 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>      }
>
>      /* Copy the FrameTemp value to EBP.  */
> -    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
> +    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
>
>      /* Compute the final value of ESP.  */
>      tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
> -    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
> +    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_leave(DisasContext *s)
> @@ -2465,8 +2465,8 @@ static void gen_leave(DisasContext *s)
>
>      tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
>
> -    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
> -    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
> +    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
> +    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
> @@ -3598,7 +3598,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  goto illegal_op;
>  #endif
>              }
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>              break;
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
> @@ -3633,7 +3633,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                  offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
>              }
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> @@ -3787,7 +3787,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                   s->T0, tcg_const_i32(8 << ot));
>
>                  ot = mo_64_32(s->dflag);
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  break;
>
>              case 0x1f0: /* crc32 or movbe */
> @@ -3814,7 +3814,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  if ((b & 1) == 0) {
>                      tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                         s->mem_index, ot | MO_BE);
> -                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  } else {
>                      tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3830,7 +3830,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_LOGICB + ot);
>                  break;
> @@ -3868,7 +3868,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_subi_tl(s->T1, s->T1, 1);
>                      tcg_gen_and_tl(s->T0, s->T0, s->T1);
>
> -                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_mov_reg_v(s, ot, reg, s->T0);
>                      gen_op_update1_cc(s);
>                      set_cc_op(s, CC_OP_LOGICB + ot);
>                  }
> @@ -3896,7 +3896,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  tcg_gen_movi_tl(s->A0, -1);
>                  tcg_gen_shl_tl(s->A0, s->A0, s->T1);
>                  tcg_gen_andc_tl(s->T0, s->T0, s->A0);
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
> @@ -4071,7 +4071,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      }
>                      tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>                  }
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  break;
>
>              case 0x0f3:
> @@ -4104,7 +4104,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      goto unknown_op;
>                  }
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> -                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
> +                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
> @@ -4145,7 +4145,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_B(val & 15)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, s->T0);
> +                        gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      } else {
>                          tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
> @@ -4155,7 +4155,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_W(val & 7)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, s->T0);
> +                        gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      } else {
>                          tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUW);
> @@ -4192,7 +4192,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_L(val & 3)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, s->T0);
> +                        gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      } else {
>                          tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUL);
> @@ -4200,7 +4200,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>                  case 0x20: /* pinsrb */
>                      if (mod == 3) {
> -                        gen_op_mov_v_reg(MO_32, s->T0, rm);
> +                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
>                      } else {
>                          tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
> @@ -4251,7 +4251,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      } else { /* pinsrq */
>  #ifdef TARGET_X86_64
>                          if (mod == 3) {
> -                            gen_op_mov_v_reg(ot, s->tmp1_i64, rm);
> +                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
>                          } else {
>                              tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
>                                                  s->mem_index, MO_LEQ);
> @@ -4326,7 +4326,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
>                      tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
>                  }
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  break;
>
>              default:
> @@ -4489,7 +4489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  #ifdef TARGET_X86_64
>      s->rex_x = 0;
>      s->rex_b = 0;
> -    x86_64_hregs = 0;
> +    s->x86_64_hregs = false;
>  #endif
>      s->rip_offset = 0; /* for relative ip address */
>      s->vex_l = 0;
> @@ -4548,7 +4548,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rex_r = (b & 0x4) << 1;
>              s->rex_x = (b & 0x2) << 2;
>              REX_B(s) = (b & 0x1) << 3;
> -            x86_64_hregs = 1; /* select uniform byte register addressing */
> +            /* select uniform byte register addressing */
> +            s->x86_64_hregs = true;
>              goto next_byte;
>          }
>          break;
> @@ -4576,7 +4577,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  goto illegal_op;
>              }
>  #ifdef TARGET_X86_64
> -            if (x86_64_hregs) {
> +            if (s->x86_64_hregs) {
>                  goto illegal_op;
>              }
>  #endif
> @@ -4681,12 +4682,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      /* xor reg, reg optimisation */
>                      set_cc_op(s, CC_OP_CLR);
>                      tcg_gen_movi_tl(s->T0, 0);
> -                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_mov_reg_v(s, ot, reg, s->T0);
>                      break;
>                  } else {
>                      opreg = rm;
>                  }
> -                gen_op_mov_v_reg(ot, s->T1, reg);
> +                gen_op_mov_v_reg(s, ot, s->T1, reg);
>                  gen_op(s, op, ot, opreg);
>                  break;
>              case 1: /* OP Gv, Ev */
> @@ -4700,7 +4701,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
>                  } else {
> -                    gen_op_mov_v_reg(ot, s->T1, rm);
> +                    gen_op_mov_v_reg(s, ot, s->T1, rm);
>                  }
>                  gen_op(s, op, ot, reg);
>                  break;
> @@ -4786,7 +4787,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>
>          switch(op) {
> @@ -4809,7 +4810,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod != 3) {
>                      gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                  }
>              }
>              break;
> @@ -4847,7 +4848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod != 3) {
>                      gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                  }
>              }
>              gen_op_update_neg_cc(s);
> @@ -4856,26 +4857,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 4: /* mul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8u_tl(s->T0, s->T0);
>                  tcg_gen_ext8u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16u_tl(s->T0, s->T0);
>                  tcg_gen_ext16u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_shri_tl(s->T0, s->T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
> @@ -4905,29 +4906,29 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 5: /* imul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8s_tl(s->T0, s->T0);
>                  tcg_gen_ext8s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext8s_tl(s->tmp0, s->T0);
>                  tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16s_tl(s->T0, s->T0);
>                  tcg_gen_ext16s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext16s_tl(s->tmp0, s->T0);
>                  tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
>                  tcg_gen_shri_tl(s->T0, s->T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
>              default:
> @@ -5026,7 +5027,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (op >= 2 && op != 3 && op != 5)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>
>          switch(op) {
> @@ -5115,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_v_reg(ot, s->T1, reg);
> +        gen_op_mov_v_reg(s, ot, s->T1, reg);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
> @@ -5125,7 +5126,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          ot = mo_b_d(b, dflag);
>          val = insn_get(env, s, ot);
>
> -        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
> +        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
>          tcg_gen_movi_tl(s->T1, val);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
> @@ -5135,20 +5136,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
>              tcg_gen_ext32s_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
>              tcg_gen_ext16s_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
> +            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
>              tcg_gen_ext8s_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5158,22 +5159,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
>              tcg_gen_sari_tl(s->T0, s->T0, 63);
> -            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
>              tcg_gen_ext32s_tl(s->T0, s->T0);
>              tcg_gen_sari_tl(s->T0, s->T0, 31);
> -            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
> +            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
>              tcg_gen_ext16s_tl(s->T0, s->T0);
>              tcg_gen_sari_tl(s->T0, s->T0, 15);
> -            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5197,7 +5198,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              val = (int8_t)insn_get(env, s, MO_8);
>              tcg_gen_movi_tl(s->T1, val);
>          } else {
> -            gen_op_mov_v_reg(ot, s->T1, reg);
> +            gen_op_mov_v_reg(s, ot, s->T1, reg);
>          }
>          switch (ot) {
>  #ifdef TARGET_X86_64
> @@ -5227,7 +5228,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>              tcg_gen_ext16s_tl(s->tmp0, s->T0);
>              tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>              break;
>          }
>          set_cc_op(s, CC_OP_MULB + ot);
> @@ -5238,13 +5239,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          modrm = x86_ldub_code(env, s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
> -        gen_op_mov_v_reg(ot, s->T0, reg);
> +        gen_op_mov_v_reg(s, ot, s->T0, reg);
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_v_reg(ot, s->T1, rm);
> +            gen_op_mov_v_reg(s, ot, s->T1, rm);
>              tcg_gen_add_tl(s->T0, s->T0, s->T1);
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> @@ -5256,7 +5257,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  tcg_gen_add_tl(s->T0, s->T0, s->T1);
>                  gen_op_st_v(s, ot, s->T0, s->A0);
>              }
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
>          }
>          gen_op_update2_cc(s);
>          set_cc_op(s, CC_OP_ADDB + ot);
> @@ -5273,7 +5274,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              oldv = tcg_temp_new();
>              newv = tcg_temp_new();
>              cmpv = tcg_temp_new();
> -            gen_op_mov_v_reg(ot, newv, reg);
> +            gen_op_mov_v_reg(s, ot, newv, reg);
>              tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
>
>              if (s->prefix & PREFIX_LOCK) {
> @@ -5283,11 +5284,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_lea_modrm(env, s, modrm);
>                  tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
>                                            s->mem_index, ot | MO_LE);
> -                gen_op_mov_reg_v(ot, R_EAX, oldv);
> +                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
>              } else {
>                  if (mod == 3) {
>                      rm = (modrm & 7) | REX_B(s);
> -                    gen_op_mov_v_reg(ot, oldv, rm);
> +                    gen_op_mov_v_reg(s, ot, oldv, rm);
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
>                      gen_op_ld_v(s, ot, oldv, s->A0);
> @@ -5298,15 +5299,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  /* store value = (old == cmp ? new : old);  */
>                  tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
>                  if (mod == 3) {
> -                    gen_op_mov_reg_v(ot, R_EAX, oldv);
> -                    gen_op_mov_reg_v(ot, rm, newv);
> +                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> +                    gen_op_mov_reg_v(s, ot, rm, newv);
>                  } else {
>                      /* Perform an unconditional store cycle like physical cpu;
>                         must be before changing accumulator to ensure
>                         idempotency if the store faults and the instruction
>                         is restarted */
>                      gen_op_st_v(s, ot, newv, s->A0);
> -                    gen_op_mov_reg_v(ot, R_EAX, oldv);
> +                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
>                  }
>              }
>              tcg_gen_mov_tl(cpu_cc_src, oldv);
> @@ -5351,14 +5352,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          /**************************/
>          /* push/pop */
>      case 0x50 ... 0x57: /* push */
> -        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
> +        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
>          gen_push_v(s, s->T0);
>          break;
>      case 0x58 ... 0x5f: /* pop */
>          ot = gen_pop_T0(s);
>          /* NOTE: order is important for pop %sp */
>          gen_pop_update(s, ot);
> -        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
> +        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0x60: /* pusha */
>          if (CODE64(s))
> @@ -5388,7 +5389,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* NOTE: order is important for pop %sp */
>              gen_pop_update(s, ot);
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> @@ -5478,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod != 3) {
>              gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
> +            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
>          }
>          break;
>      case 0x8a:
> @@ -5488,7 +5489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_reg_v(ot, reg, s->T0);
> +        gen_op_mov_reg_v(s, ot, reg, s->T0);
>          break;
>      case 0x8e: /* mov seg, Gv */
>          modrm = x86_ldub_code(env, s);
> @@ -5540,10 +5541,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>
>              if (mod == 3) {
> -                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
> +                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
>                      tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
>                  } else {
> -                    gen_op_mov_v_reg(ot, s->T0, rm);
> +                    gen_op_mov_v_reg(s, ot, s->T0, rm);
>                      switch (s_ot) {
>                      case MO_UB:
>                          tcg_gen_ext8u_tl(s->T0, s->T0);
> @@ -5560,11 +5561,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                          break;
>                      }
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
>                  gen_op_ld_v(s, s_ot, s->T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              }
>          }
>          break;
> @@ -5579,7 +5580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
>              TCGv ea = gen_lea_modrm_1(s, a);
>              gen_lea_v_seg(s, s->aflag, ea, -1, -1);
> -            gen_op_mov_reg_v(dflag, reg, s->A0);
> +            gen_op_mov_reg_v(s, dflag, reg, s->A0);
>          }
>          break;
>
> @@ -5605,9 +5606,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_add_A0_ds_seg(s);
>              if ((b & 2) == 0) {
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
> -                gen_op_mov_reg_v(ot, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
>              } else {
> -                gen_op_mov_v_reg(ot, s->T0, R_EAX);
> +                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
>                  gen_op_st_v(s, ot, s->T0, s->A0);
>              }
>          }
> @@ -5619,12 +5620,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_extu(s->aflag, s->A0);
>          gen_add_A0_ds_seg(s);
>          gen_op_ld_v(s, MO_8, s->T0, s->A0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
> +        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
>          break;
>      case 0xb0 ... 0xb7: /* mov R, Ib */
>          val = insn_get(env, s, MO_8);
>          tcg_gen_movi_tl(s->T0, val);
> -        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
> +        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0xb8 ... 0xbf: /* mov R, Iv */
>  #ifdef TARGET_X86_64
> @@ -5634,7 +5635,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tmp = x86_ldq_code(env, s);
>              reg = (b & 7) | REX_B(s);
>              tcg_gen_movi_tl(s->T0, tmp);
> -            gen_op_mov_reg_v(MO_64, reg, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
>          } else
>  #endif
>          {
> @@ -5642,7 +5643,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              val = insn_get(env, s, ot);
>              reg = (b & 7) | REX_B(s);
>              tcg_gen_movi_tl(s->T0, val);
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>          }
>          break;
>
> @@ -5661,17 +5662,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
>          do_xchg_reg:
> -            gen_op_mov_v_reg(ot, s->T0, reg);
> -            gen_op_mov_v_reg(ot, s->T1, rm);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> +            gen_op_mov_v_reg(s, ot, s->T0, reg);
> +            gen_op_mov_v_reg(s, ot, s->T1, rm);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
>          } else {
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_mov_v_reg(ot, s->T0, reg);
> +            gen_op_mov_v_reg(s, ot, s->T0, reg);
>              /* for xchg, lock is implicit */
>              tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
>                                     s->mem_index, ot | MO_LE);
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
>          }
>          break;
>      case 0xc4: /* les Gv */
> @@ -5704,7 +5705,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
> -        gen_op_mov_reg_v(ot, reg, s->T1);
> +        gen_op_mov_reg_v(s, ot, reg, s->T1);
>          if (s->base.is_jmp) {
>              gen_jmp_im(s, s->pc - s->cs_base);
>              gen_eob(s);
> @@ -5783,7 +5784,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              opreg = rm;
>          }
> -        gen_op_mov_v_reg(ot, s->T1, reg);
> +        gen_op_mov_v_reg(s, ot, s->T1, reg);
>
>          if (shift) {
>              TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
> @@ -6244,7 +6245,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  case 0:
>                      gen_helper_fnstsw(s->tmp2_i32, cpu_env);
>                      tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
> -                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                      break;
>                  default:
>                      goto unknown_op;
> @@ -6397,7 +6398,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  	}
>          tcg_gen_movi_i32(s->tmp2_i32, val);
>          gen_helper_in_func(ot, s->T1, s->tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, s->T1);
> +        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
>          gen_bpt_io(s, s->tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6411,7 +6412,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, s->T1, R_EAX);
> +        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
> @@ -6436,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  	}
>          tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
>          gen_helper_in_func(ot, s->T1, s->tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, s->T1);
> +        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
>          gen_bpt_io(s, s->tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6449,7 +6450,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, s->T1, R_EAX);
> +        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
> @@ -6708,7 +6709,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x9e: /* sahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
> -        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
> +        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
>          gen_compute_eflags(s);
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
>          tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
> @@ -6720,7 +6721,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_compute_eflags(s);
>          /* Note: gen_compute_eflags() only gives the condition codes */
>          tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
> -        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
> +        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
>          break;
>      case 0xf5: /* cmc */
>          gen_compute_eflags(s);
> @@ -6758,7 +6759,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>          /* load shift */
>          val = x86_ldub_code(env, s);
> @@ -6784,7 +6785,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
>          rm = (modrm & 7) | REX_B(s);
> -        gen_op_mov_v_reg(MO_32, s->T1, reg);
> +        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
>          if (mod != 3) {
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
>              /* specific case: we need to add a displacement */
> @@ -6797,7 +6798,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>      bt_op:
>          tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
> @@ -6847,7 +6848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod != 3) {
>                      gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                  }
>              }
>          }
> @@ -6930,7 +6931,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
>              }
>          }
> -        gen_op_mov_reg_v(ot, reg, s->T0);
> +        gen_op_mov_reg_v(s, ot, reg, s->T0);
>          break;
>          /************************/
>          /* bcd */
> @@ -7070,7 +7071,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_op_mov_v_reg(ot, s->T0, reg);
> +        gen_op_mov_v_reg(s, ot, s->T0, reg);
>          gen_lea_modrm(env, s, modrm);
>          tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
>          if (ot == MO_16) {
> @@ -7083,16 +7084,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = (b & 7) | REX_B(s);
>  #ifdef TARGET_X86_64
>          if (dflag == MO_64) {
> -            gen_op_mov_v_reg(MO_64, s->T0, reg);
> +            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
>              tcg_gen_bswap64_i64(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_64, reg, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
>          } else
>  #endif
>          {
> -            gen_op_mov_v_reg(MO_32, s->T0, reg);
> +            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
>              tcg_gen_ext32u_tl(s->T0, s->T0);
>              tcg_gen_bswap32_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_32, reg, s->T0);
> +            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
>          }
>          break;
>      case 0xd6: /* salc */
> @@ -7100,7 +7101,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              goto illegal_op;
>          gen_compute_eflags_c(s, s->T0);
>          tcg_gen_neg_tl(s->T0, s->T0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
> +        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
>          break;
>      case 0xe0: /* loopnz */
>      case 0xe1: /* loopz */
> @@ -7661,16 +7662,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>
>              if (mod == 3) {
> -                gen_op_mov_v_reg(MO_32, s->T0, rm);
> +                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
>                  /* sign extend */
>                  if (d_ot == MO_64) {
>                      tcg_gen_ext32s_tl(s->T0, s->T0);
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
>                  gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              }
>          } else
>  #endif
> @@ -7694,10 +7695,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  a0 = tcg_temp_local_new();
>                  tcg_gen_mov_tl(a0, s->A0);
>              } else {
> -                gen_op_mov_v_reg(ot, t0, rm);
> +                gen_op_mov_v_reg(s, ot, t0, rm);
>                  a0 = NULL;
>              }
> -            gen_op_mov_v_reg(ot, t1, reg);
> +            gen_op_mov_v_reg(s, ot, t1, reg);
>              tcg_gen_andi_tl(s->tmp0, t0, 3);
>              tcg_gen_andi_tl(t1, t1, 3);
>              tcg_gen_movi_tl(t2, 0);
> @@ -7711,7 +7712,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_st_v(s, ot, t0, a0);
>                  tcg_temp_free(a0);
>             } else {
> -                gen_op_mov_reg_v(ot, rm, t0);
> +                gen_op_mov_reg_v(s, ot, rm, t0);
>              }
>              gen_compute_eflags(s);
>              tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
> @@ -7742,7 +7743,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
>              label1 = gen_new_label();
>              tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
> -            gen_op_mov_reg_v(ot, reg, t0);
> +            gen_op_mov_reg_v(s, ot, reg, t0);
>              gen_set_label(label1);
>              set_cc_op(s, CC_OP_EFLAGS);
>              tcg_temp_free(t0);
> @@ -7996,7 +7997,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_start();
>                      }
> -                    gen_op_mov_v_reg(ot, s->T0, rm);
> +                    gen_op_mov_v_reg(s, ot, s->T0, rm);
>                      gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
>                                           s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -8009,7 +8010,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                          gen_io_start();
>                      }
>                      gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_end();
>                      }
> @@ -8042,7 +8043,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              if (b & 2) {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
> -                gen_op_mov_v_reg(ot, s->T0, rm);
> +                gen_op_mov_v_reg(s, ot, s->T0, rm);
>                  tcg_gen_movi_i32(s->tmp2_i32, reg);
>                  gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
>                  gen_jmp_im(s, s->pc - s->cs_base);
> @@ -8051,7 +8052,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
>                  tcg_gen_movi_i32(s->tmp2_i32, reg);
>                  gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
> -                gen_op_mov_reg_v(ot, rm, s->T0);
> +                gen_op_mov_reg_v(s, ot, rm, s->T0);
>              }
>          }
>          break;
> @@ -8313,7 +8314,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_extu(ot, s->T0);
>          tcg_gen_mov_tl(cpu_cc_src, s->T0);
>          tcg_gen_ctpop_tl(s->T0, s->T0);
> -        gen_op_mov_reg_v(ot, reg, s->T0);
> +        gen_op_mov_reg_v(s, ot, reg, s->T0);
>
>          set_cc_op(s, CC_OP_POPCNT);
>          break;


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2018-09-13 14:31 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
2018-09-11 20:44   ` Richard Henderson
2018-09-13 14:21   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
2018-09-11 20:45   ` Richard Henderson
2018-09-13 14:23   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
2018-09-11 20:47   ` Richard Henderson
2018-09-13 14:25   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
2018-09-11 20:48   ` Richard Henderson
2018-09-13 14:26   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
2018-09-11 20:51   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
2018-09-11 20:52   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
2018-09-11 20:53   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
2018-09-11 20:54   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
2018-09-11 20:55   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
2018-09-11 20:56   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
2018-09-11 20:57   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
2018-09-11 20:58   ` Richard Henderson
2018-09-13 14:31   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
2018-09-12 12:46 ` [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).