From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Mrfxo-0002zT-Bn for qemu-devel@nongnu.org; Sat, 26 Sep 2009 18:42:00 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1Mrfxn-0002zB-9g for qemu-devel@nongnu.org; Sat, 26 Sep 2009 18:41:59 -0400 Received: from [199.232.76.173] (port=42785 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Mrfxn-0002z8-5k for qemu-devel@nongnu.org; Sat, 26 Sep 2009 18:41:59 -0400 Received: from hall.aurel32.net ([88.191.82.174]:47203) by monty-python.gnu.org with esmtps (TLS-1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1Mrfxm-00025t-7F for qemu-devel@nongnu.org; Sat, 26 Sep 2009 18:41:58 -0400 Date: Sun, 27 Sep 2009 00:41:56 +0200 From: Aurelien Jarno Subject: Re: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers Message-ID: <20090926224156.GA22663@hall.aurel32.net> References: <761ea48b0909131400i33efc212nce026adb75a4f5d2@mail.gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 Content-Disposition: inline In-Reply-To: <761ea48b0909131400i33efc212nce026adb75a4f5d2@mail.gmail.com> Sender: Aurelien Jarno List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Laurent Desnogues Cc: qemu-devel@nongnu.org On Sun, Sep 13, 2009 at 11:00:08PM +0200, Laurent Desnogues wrote: > Hello, > > this patch is a proposal to use globals for the 8 or 16 CPU > registers on i386 and x86_64. > > I measured the improvement in the following conditions: > > - Machine: i7 920 > - Software: Fedora11 x86_64 gcc 4.4.1 > - Benchmark: SPEC2000 gcc with expr.i input > - User mode > - i386 and x86_64 hosts and targets, with and without the patch > (8 combinations) > > The results are: > > qemu-i386_on-i386 15.82user 0.05system 0:15.91elapsed > qemu-i386_on-i386-reg 15.40user 0.02system 0:15.43elapsed > qemu-i386_on-x86_64 15.65user 0.05system 0:15.71elapsed > qemu-i386_on-x86_64-reg 15.11user 0.03system 0:15.15elapsed > qemu-x86_64_on-i386 mmap: No such device or address > qemu-x86_64_on-i386-reg mmap: No such device or address > qemu-x86_64_on-x86_64 18.42user 0.07system 0:18.49elapsed > qemu-x86_64_on-x86_64-reg 13.22user 0.06system 0:13.31elapsed > > Given my lack of knowledge of system QEMU, I will leave it to > someone else to measure the speedup. I'll try to provide benchmarks later. > A previous version of that patch, that only handled i386 target, > was tested by Malc who got speedup running OpenSSL on his G4. It > was also sent to Fabrice who asked me to send it to the mailing > list. > > The usage of globals is controlled by USE_REGS so that reviewers > can quickly test the benefit (or the lack of it). > > Comments are welcome (except for the obvious presence of // > which is only temporary). I need to optimize a few things once > I'm sure the temporaries (cpu_tmp0, ...) are not used outside of > the modified functions. x86_64 was coded in a hurry and is > perhaps buggy. > It basically looks good. Please find my comments inline. > Laurent > > Signed-off-by: Laurent Desnogues > diff --git a/target-i386/translate.c b/target-i386/translate.c > index 335fc08..dc2fcde 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -58,10 +58,15 @@ > > //#define MACRO_TEST 1 > > +#define USE_REGS > + > /* global register indexes */ > static TCGv_ptr cpu_env; > static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp; > static TCGv_i32 cpu_cc_op; > +#ifdef USE_REGS > +static TCGv cpu_regs[CPU_NB_REGS]; > +#endif > /* local temps */ > static TCGv cpu_T[2], cpu_T3; > /* local register indexes (only used inside old micro ops) */ > @@ -269,70 +274,95 @@ static inline void gen_op_andl_A0_ffff(void) > #define REG_LH_OFFSET 4 > #endif > > +#ifdef USE_REGS > +#ifdef TARGET_X86_64 > +/* #warning NYI */ > +#endif > + > static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0) > { > + TCGv tmp; > + > switch(ot) { > case OT_BYTE: > + tmp = tcg_temp_new(); > + tcg_gen_andi_tl(tmp, t0, 0xff); tcg_gen_ext8u_tl(tmp, t0); ? > if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { > - tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET); > + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff); > + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); > + //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET); > } else { > - tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); > + tcg_gen_shli_tl(tmp, tmp, 8); > + tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00); > + tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp); > + //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); > } > + tcg_temp_free(tmp); > break; > case OT_WORD: > - tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > + tmp = tcg_temp_new(); > + tcg_gen_andi_tl(tmp, t0, 0xffff); tcg_gen_ext16u_tl(tmp, t0); ? > + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); > + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); > + tcg_temp_free(tmp); > + //tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > break; > #ifdef TARGET_X86_64 > case OT_LONG: > - tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > /* high part of register set to zero */ > - tcg_gen_movi_tl(cpu_tmp0, 0); > - tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > + tcg_gen_ext32u_tl(cpu_regs[reg], t0); > + //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + /* high part of register set to zero */ > + //tcg_gen_movi_tl(cpu_tmp0, 0); > + //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > break; > default: > case OT_QUAD: > - tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); > + tcg_gen_mov_tl(cpu_regs[reg], t0); > + //tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); > break; > #else > default: > case OT_LONG: > - tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + tcg_gen_mov_tl(cpu_regs[reg], t0); > + //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > break; > #endif tcg_gen_ext32u_tl(cpu_regs[reg], t0) is equivalent to tcg_gen_mov_tl(cpu_regs[reg], t0) if TARGET_LONG_BITS == 32, ie if !TARGET_X86_64. This means the OT_LONG can now be common, with the #ifdef only for OT_QUAD. > } > } > > -static inline void gen_op_mov_reg_T0(int ot, int reg) > -{ > - gen_op_mov_reg_v(ot, reg, cpu_T[0]); > -} > - > -static inline void gen_op_mov_reg_T1(int ot, int reg) > -{ > - gen_op_mov_reg_v(ot, reg, cpu_T[1]); > -} > - > static inline void gen_op_mov_reg_A0(int size, int reg) > { > + TCGv tmp; > + > switch(size) { > case 0: > - tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > + tmp = tcg_temp_new(); > + tcg_gen_andi_tl(tmp, cpu_A0, 0xffff); tcg_gen_ext16u_tl(tmp, t0); ? > + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); > + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); > + tcg_temp_free(tmp); > + //tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > break; > #ifdef TARGET_X86_64 > case 1: > - tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > /* high part of register set to zero */ > - tcg_gen_movi_tl(cpu_tmp0, 0); > - tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0); > + //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + /* high part of register set to zero */ > + //tcg_gen_movi_tl(cpu_tmp0, 0); > + //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > break; > default: > case 2: > - tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg])); > + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0); > + //tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg])); > break; > #else > default: > case 1: > - tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0); > + //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > break; > #endif Same comment as previous to share more code between x86 and x86_64. > } > @@ -345,59 +375,213 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg) > if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { > goto std_case; > } else { > - tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); > + tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8); > + tcg_gen_andi_tl(t0, t0, 0xff); tcg_gen_ext8u_tl(t0, t0) ? > + //tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); > } > break; > default: > std_case: > - tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); > + tcg_gen_mov_tl(t0, cpu_regs[reg]); > + //tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); > break; > } > } > > -static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg) > +static inline void gen_op_movl_A0_reg(int reg) > { > - gen_op_mov_v_reg(ot, cpu_T[t_index], reg); > + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]); > + //tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > } > > -static inline void gen_op_movl_A0_reg(int reg) > +static inline void gen_op_add_reg_im(int size, int reg, int32_t val) > { > - tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + TCGv tmp; > + > + switch(size) { > + case 0: > + // TODO optimize > + tmp = tcg_temp_new(); > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); > + tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val); > + tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff); > + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); > + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); > + tcg_temp_free(tmp); > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val); > + //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > + break; > + case 1: > + // TODO optimize > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); > + tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val); > +#ifdef TARGET_X86_64 > + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff); tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0) would automatically be removed at compilation time if !TARGET_X86_64 > +#endif > + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val); > + //#ifdef TARGET_X86_64 > + //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff); > + //#endif > + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > +#ifdef TARGET_X86_64 > + case 2: > + tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val); > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val); > + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > +#endif > + } > } > > -static inline void gen_op_addl_A0_im(int32_t val) > +static inline void gen_op_add_reg_T0(int size, int reg) > { > - tcg_gen_addi_tl(cpu_A0, cpu_A0, val); > + TCGv tmp; > + > + switch(size) { > + case 0: > + // TODO optimize > + tmp = tcg_temp_new(); > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); > + tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]); > + tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff); > + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); > + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); > + tcg_temp_free(tmp); > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]); > + //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > + break; > + case 1: > + // TODO optimize > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); > + tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]); > #ifdef TARGET_X86_64 > - tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); > + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff); same here > +#endif > + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]); > + //#ifdef TARGET_X86_64 > + //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff); > + //#endif > + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > +#ifdef TARGET_X86_64 > + case 2: > + tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]); > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]); > + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > #endif > + } > } > > -#ifdef TARGET_X86_64 > -static inline void gen_op_addq_A0_im(int64_t val) > +static inline void gen_op_addl_A0_reg_sN(int shift, int reg) > { > - tcg_gen_addi_tl(cpu_A0, cpu_A0, val); > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); > + if (shift != 0) > + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); > + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); > +#ifdef TARGET_X86_64 > + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); > +#endif > + > + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > + //if (shift != 0) > + // tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); > + //tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); > + //#ifdef TARGET_X86_64 > + //tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); > + //#endif > } > + > +#else > + > +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0) > +{ > + switch(ot) { > + case OT_BYTE: > + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { > + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET); > + } else { > + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); > + } > + break; > + case OT_WORD: > + tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > + break; > +#ifdef TARGET_X86_64 > + case OT_LONG: > + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + /* high part of register set to zero */ > + tcg_gen_movi_tl(cpu_tmp0, 0); > + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > + break; > + default: > + case OT_QUAD: > + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > +#else > + default: > + case OT_LONG: > + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + break; > #endif > - > -static void gen_add_A0_im(DisasContext *s, int val) > + } > +} > + > +static inline void gen_op_mov_reg_A0(int size, int reg) > { > + switch(size) { > + case 0: > + tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET); > + break; > #ifdef TARGET_X86_64 > - if (CODE64(s)) > - gen_op_addq_A0_im(val); > - else > + case 1: > + tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + /* high part of register set to zero */ > + tcg_gen_movi_tl(cpu_tmp0, 0); > + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > + break; > + default: > + case 2: > + tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > +#else > + default: > + case 1: > + tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > + break; > #endif > - gen_op_addl_A0_im(val); > + } > } > > -static inline void gen_op_addl_T0_T1(void) > +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg) > { > - tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); > + switch(ot) { > + case OT_BYTE: > + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { > + goto std_case; > + } else { > + tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET); > + } > + break; > + default: > + std_case: > + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg])); > + break; > + } > } > > -static inline void gen_op_jmp_T0(void) > +static inline void gen_op_movl_A0_reg(int reg) > { > - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip)); > + tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > } > > static inline void gen_op_add_reg_im(int size, int reg, int32_t val) > @@ -452,15 +636,10 @@ static inline void gen_op_add_reg_T0(int size, int reg) > } > } > > -static inline void gen_op_set_cc_op(int32_t val) > -{ > - tcg_gen_movi_i32(cpu_cc_op, val); > -} > - > static inline void gen_op_addl_A0_reg_sN(int shift, int reg) > { > tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > - if (shift != 0) > + if (shift != 0) > tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); > tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); > #ifdef TARGET_X86_64 > @@ -468,6 +647,63 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int reg) > #endif > } > > +#endif > + > +static inline void gen_op_mov_reg_T0(int ot, int reg) > +{ > + gen_op_mov_reg_v(ot, reg, cpu_T[0]); > +} > + > +static inline void gen_op_mov_reg_T1(int ot, int reg) > +{ > + gen_op_mov_reg_v(ot, reg, cpu_T[1]); > +} > + > +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg) > +{ > + gen_op_mov_v_reg(ot, cpu_T[t_index], reg); > +} > + > +static inline void gen_op_addl_A0_im(int32_t val) > +{ > + tcg_gen_addi_tl(cpu_A0, cpu_A0, val); > +#ifdef TARGET_X86_64 > + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); > +#endif > +} > + > +#ifdef TARGET_X86_64 > +static inline void gen_op_addq_A0_im(int64_t val) > +{ > + tcg_gen_addi_tl(cpu_A0, cpu_A0, val); > +} > +#endif > + > +static void gen_add_A0_im(DisasContext *s, int val) > +{ > +#ifdef TARGET_X86_64 > + if (CODE64(s)) > + gen_op_addq_A0_im(val); > + else > +#endif > + gen_op_addl_A0_im(val); > +} > + > +static inline void gen_op_addl_T0_T1(void) > +{ > + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); > +} > + > +static inline void gen_op_jmp_T0(void) > +{ > + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip)); > +} > + > +static inline void gen_op_set_cc_op(int32_t val) > +{ > + tcg_gen_movi_i32(cpu_cc_op, val); > +} > + > static inline void gen_op_movl_A0_seg(int reg) > { > tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET); > @@ -496,13 +732,21 @@ static inline void gen_op_addq_A0_seg(int reg) > > static inline void gen_op_movq_A0_reg(int reg) > { > +#ifdef USE_REGS > + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]); > +#else > tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg])); > +#endif > } > > static inline void gen_op_addq_A0_reg_sN(int shift, int reg) > { > +#ifdef USE_REGS > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); > +#else > tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg])); > - if (shift != 0) > +#endif > + if (shift != 0) > tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); > tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); > } > @@ -701,14 +945,22 @@ static void gen_exts(int ot, TCGv reg) > > static inline void gen_op_jnz_ecx(int size, int label1) > { > +#ifdef USE_REGS > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); > +#else > tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX])); > +#endif > gen_extu(size + 1, cpu_tmp0); > tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1); > } > > static inline void gen_op_jz_ecx(int size, int label1) > { > +#ifdef USE_REGS > + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); > +#else > tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX])); > +#endif > gen_extu(size + 1, cpu_tmp0); > tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); > } > @@ -4834,7 +5086,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) > rm = 0; /* avoid warning */ > } > label1 = gen_new_label(); > +#ifdef USE_REGS > + tcg_gen_mov_tl(t2, cpu_regs[R_EAX]); > +#else > tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX])); > +#endif > tcg_gen_sub_tl(t2, t2, t0); > gen_extu(ot, t2); > tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); > @@ -5409,7 +5665,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) > val = ldub_code(s->pc++); > tcg_gen_movi_tl(cpu_T3, val); > } else { > +#ifdef USE_REGS > + tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]); > +#else > tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX])); > +#endif > } > gen_shiftd_rm_T1_T3(s, ot, opreg, op); > break; > @@ -6317,10 +6577,18 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) > /* XXX: specific Intel behaviour ? */ > l1 = gen_new_label(); > gen_jcc1(s, s->cc_op, b ^ 1, l1); > +#ifdef USE_REGS > + tcg_gen_mov_tl(cpu_regs[reg], t0); > +#else > tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); > +#endif > gen_set_label(l1); > +#ifdef USE_REGS > + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); > +#else > tcg_gen_movi_tl(cpu_tmp0, 0); > tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); > +#endif > } else > #endif > { > @@ -7588,6 +7856,60 @@ void optimize_flags_init(void) > cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp), > "cc_tmp"); > > +#ifdef USE_REGS > +#ifdef TARGET_X86_64 > + cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_EAX]), "rax"); > + cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_ECX]), "rcx"); > + cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_EDX]), "rdx"); > + cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_EBX]), "rbx"); > + cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_ESP]), "rsp"); > + cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_EBP]), "rbp"); > + cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_ESI]), "rsi"); > + cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[R_EDI]), "rdi"); > + cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[8]), "r8"); > + cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[9]), "r9"); > + cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[10]), "r10"); > + cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[11]), "r11"); > + cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[12]), "r12"); > + cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[13]), "r13"); > + cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[14]), "r14"); > + cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0, > + offsetof(CPUState, regs[15]), "r15"); > +#else > + cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_EAX]), "eax"); > + cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_ECX]), "ecx"); > + cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_EDX]), "edx"); > + cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_EBX]), "ebx"); > + cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_ESP]), "esp"); > + cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_EBP]), "ebp"); > + cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_ESI]), "esi"); > + cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0, > + offsetof(CPUState, regs[R_EDI]), "edi"); > +#endif > +#endif > + > /* register helpers */ > #define GEN_HELPER 2 > #include "helper.h" -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net