From: Aurelien Jarno <aurelien@aurel32.net>
To: Laurent Desnogues <laurent.desnogues@gmail.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers
Date: Sun, 27 Sep 2009 00:41:56 +0200 [thread overview]
Message-ID: <20090926224156.GA22663@hall.aurel32.net> (raw)
In-Reply-To: <761ea48b0909131400i33efc212nce026adb75a4f5d2@mail.gmail.com>
On Sun, Sep 13, 2009 at 11:00:08PM +0200, Laurent Desnogues wrote:
> Hello,
>
> this patch is a proposal to use globals for the 8 or 16 CPU
> registers on i386 and x86_64.
>
> I measured the improvement in the following conditions:
>
> - Machine: i7 920
> - Software: Fedora11 x86_64 gcc 4.4.1
> - Benchmark: SPEC2000 gcc with expr.i input
> - User mode
> - i386 and x86_64 hosts and targets, with and without the patch
> (8 combinations)
>
> The results are:
>
> qemu-i386_on-i386 15.82user 0.05system 0:15.91elapsed
> qemu-i386_on-i386-reg 15.40user 0.02system 0:15.43elapsed
> qemu-i386_on-x86_64 15.65user 0.05system 0:15.71elapsed
> qemu-i386_on-x86_64-reg 15.11user 0.03system 0:15.15elapsed
> qemu-x86_64_on-i386 mmap: No such device or address
> qemu-x86_64_on-i386-reg mmap: No such device or address
> qemu-x86_64_on-x86_64 18.42user 0.07system 0:18.49elapsed
> qemu-x86_64_on-x86_64-reg 13.22user 0.06system 0:13.31elapsed
>
> Given my lack of knowledge of system QEMU, I will leave it to
> someone else to measure the speedup.
I'll try to provide benchmarks later.
> A previous version of that patch, that only handled i386 target,
> was tested by Malc who got speedup running OpenSSL on his G4. It
> was also sent to Fabrice who asked me to send it to the mailing
> list.
>
> The usage of globals is controlled by USE_REGS so that reviewers
> can quickly test the benefit (or the lack of it).
>
> Comments are welcome (except for the obvious presence of //
> which is only temporary). I need to optimize a few things once
> I'm sure the temporaries (cpu_tmp0, ...) are not used outside of
> the modified functions. x86_64 was coded in a hurry and is
> perhaps buggy.
>
It basically looks good. Please find my comments inline.
> Laurent
>
> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 335fc08..dc2fcde 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,10 +58,15 @@
>
> //#define MACRO_TEST 1
>
> +#define USE_REGS
> +
> /* global register indexes */
> static TCGv_ptr cpu_env;
> static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
> static TCGv_i32 cpu_cc_op;
> +#ifdef USE_REGS
> +static TCGv cpu_regs[CPU_NB_REGS];
> +#endif
> /* local temps */
> static TCGv cpu_T[2], cpu_T3;
> /* local register indexes (only used inside old micro ops) */
> @@ -269,70 +274,95 @@ static inline void gen_op_andl_A0_ffff(void)
> #define REG_LH_OFFSET 4
> #endif
>
> +#ifdef USE_REGS
> +#ifdef TARGET_X86_64
> +/* #warning NYI */
> +#endif
> +
> static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
> {
> + TCGv tmp;
> +
> switch(ot) {
> case OT_BYTE:
> + tmp = tcg_temp_new();
> + tcg_gen_andi_tl(tmp, t0, 0xff);
tcg_gen_ext8u_tl(tmp, t0); ?
> if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> - tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
> } else {
> - tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
> + tcg_gen_shli_tl(tmp, tmp, 8);
> + tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
> + tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
> + //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
> }
> + tcg_temp_free(tmp);
> break;
> case OT_WORD:
> - tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> + tmp = tcg_temp_new();
> + tcg_gen_andi_tl(tmp, t0, 0xffff);
tcg_gen_ext16u_tl(tmp, t0); ?
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> break;
> #ifdef TARGET_X86_64
> case OT_LONG:
> - tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> /* high part of register set to zero */
> - tcg_gen_movi_tl(cpu_tmp0, 0);
> - tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> + tcg_gen_ext32u_tl(cpu_regs[reg], t0);
> + //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + /* high part of register set to zero */
> + //tcg_gen_movi_tl(cpu_tmp0, 0);
> + //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> break;
> default:
> case OT_QUAD:
> - tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + tcg_gen_mov_tl(cpu_regs[reg], t0);
> + //tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> break;
> #else
> default:
> case OT_LONG:
> - tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + tcg_gen_mov_tl(cpu_regs[reg], t0);
> + //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> break;
> #endif
tcg_gen_ext32u_tl(cpu_regs[reg], t0) is equivalent to
tcg_gen_mov_tl(cpu_regs[reg], t0) if TARGET_LONG_BITS == 32, ie
if !TARGET_X86_64. This means the OT_LONG can now be common, with
the #ifdef only for OT_QUAD.
> }
> }
>
> -static inline void gen_op_mov_reg_T0(int ot, int reg)
> -{
> - gen_op_mov_reg_v(ot, reg, cpu_T[0]);
> -}
> -
> -static inline void gen_op_mov_reg_T1(int ot, int reg)
> -{
> - gen_op_mov_reg_v(ot, reg, cpu_T[1]);
> -}
> -
> static inline void gen_op_mov_reg_A0(int size, int reg)
> {
> + TCGv tmp;
> +
> switch(size) {
> case 0:
> - tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> + tmp = tcg_temp_new();
> + tcg_gen_andi_tl(tmp, cpu_A0, 0xffff);
tcg_gen_ext16u_tl(tmp, t0); ?
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> break;
> #ifdef TARGET_X86_64
> case 1:
> - tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> /* high part of register set to zero */
> - tcg_gen_movi_tl(cpu_tmp0, 0);
> - tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
> + //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + /* high part of register set to zero */
> + //tcg_gen_movi_tl(cpu_tmp0, 0);
> + //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> break;
> default:
> case 2:
> - tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> + //tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> break;
> #else
> default:
> case 1:
> - tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> + //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> break;
> #endif
Same comment as previous to share more code between x86 and
x86_64.
> }
> @@ -345,59 +375,213 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
> if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> goto std_case;
> } else {
> - tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
> + tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
> + tcg_gen_andi_tl(t0, t0, 0xff);
tcg_gen_ext8u_tl(t0, t0) ?
> + //tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
> }
> break;
> default:
> std_case:
> - tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + tcg_gen_mov_tl(t0, cpu_regs[reg]);
> + //tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> break;
> }
> }
>
> -static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
> +static inline void gen_op_movl_A0_reg(int reg)
> {
> - gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
> + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
> + //tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> }
>
> -static inline void gen_op_movl_A0_reg(int reg)
> +static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> {
> - tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + TCGv tmp;
> +
> + switch(size) {
> + case 0:
> + // TODO optimize
> + tmp = tcg_temp_new();
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> + break;
> + case 1:
> + // TODO optimize
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +#ifdef TARGET_X86_64
> + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0) would automatically be
removed at compilation time if !TARGET_X86_64
> +#endif
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + //#ifdef TARGET_X86_64
> + //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
> + //#endif
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#ifdef TARGET_X86_64
> + case 2:
> + tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#endif
> + }
> }
>
> -static inline void gen_op_addl_A0_im(int32_t val)
> +static inline void gen_op_add_reg_T0(int size, int reg)
> {
> - tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> + TCGv tmp;
> +
> + switch(size) {
> + case 0:
> + // TODO optimize
> + tmp = tcg_temp_new();
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> + break;
> + case 1:
> + // TODO optimize
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> #ifdef TARGET_X86_64
> - tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
same here
> +#endif
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + //#ifdef TARGET_X86_64
> + //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
> + //#endif
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#ifdef TARGET_X86_64
> + case 2:
> + tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> #endif
> + }
> }
>
> -#ifdef TARGET_X86_64
> -static inline void gen_op_addq_A0_im(int64_t val)
> +static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
> {
> - tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + if (shift != 0)
> + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +#ifdef TARGET_X86_64
> + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +#endif
> +
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //if (shift != 0)
> + // tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> + //tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> + //#ifdef TARGET_X86_64
> + //tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> + //#endif
> }
> +
> +#else
> +
> +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
> +{
> + switch(ot) {
> + case OT_BYTE:
> + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
> + } else {
> + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
> + }
> + break;
> + case OT_WORD:
> + tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> + break;
> +#ifdef TARGET_X86_64
> + case OT_LONG:
> + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + /* high part of register set to zero */
> + tcg_gen_movi_tl(cpu_tmp0, 0);
> + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> + break;
> + default:
> + case OT_QUAD:
> + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#else
> + default:
> + case OT_LONG:
> + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + break;
> #endif
> -
> -static void gen_add_A0_im(DisasContext *s, int val)
> + }
> +}
> +
> +static inline void gen_op_mov_reg_A0(int size, int reg)
> {
> + switch(size) {
> + case 0:
> + tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
> + break;
> #ifdef TARGET_X86_64
> - if (CODE64(s))
> - gen_op_addq_A0_im(val);
> - else
> + case 1:
> + tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + /* high part of register set to zero */
> + tcg_gen_movi_tl(cpu_tmp0, 0);
> + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> + break;
> + default:
> + case 2:
> + tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#else
> + default:
> + case 1:
> + tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> + break;
> #endif
> - gen_op_addl_A0_im(val);
> + }
> }
>
> -static inline void gen_op_addl_T0_T1(void)
> +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
> {
> - tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> + switch(ot) {
> + case OT_BYTE:
> + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> + goto std_case;
> + } else {
> + tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
> + }
> + break;
> + default:
> + std_case:
> + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> + }
> }
>
> -static inline void gen_op_jmp_T0(void)
> +static inline void gen_op_movl_A0_reg(int reg)
> {
> - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
> + tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> }
>
> static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> @@ -452,15 +636,10 @@ static inline void gen_op_add_reg_T0(int size, int reg)
> }
> }
>
> -static inline void gen_op_set_cc_op(int32_t val)
> -{
> - tcg_gen_movi_i32(cpu_cc_op, val);
> -}
> -
> static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
> {
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> - if (shift != 0)
> + if (shift != 0)
> tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> #ifdef TARGET_X86_64
> @@ -468,6 +647,63 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
> #endif
> }
>
> +#endif
> +
> +static inline void gen_op_mov_reg_T0(int ot, int reg)
> +{
> + gen_op_mov_reg_v(ot, reg, cpu_T[0]);
> +}
> +
> +static inline void gen_op_mov_reg_T1(int ot, int reg)
> +{
> + gen_op_mov_reg_v(ot, reg, cpu_T[1]);
> +}
> +
> +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
> +{
> + gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
> +}
> +
> +static inline void gen_op_addl_A0_im(int32_t val)
> +{
> + tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +#ifdef TARGET_X86_64
> + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +#endif
> +}
> +
> +#ifdef TARGET_X86_64
> +static inline void gen_op_addq_A0_im(int64_t val)
> +{
> + tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +}
> +#endif
> +
> +static void gen_add_A0_im(DisasContext *s, int val)
> +{
> +#ifdef TARGET_X86_64
> + if (CODE64(s))
> + gen_op_addq_A0_im(val);
> + else
> +#endif
> + gen_op_addl_A0_im(val);
> +}
> +
> +static inline void gen_op_addl_T0_T1(void)
> +{
> + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> +}
> +
> +static inline void gen_op_jmp_T0(void)
> +{
> + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
> +}
> +
> +static inline void gen_op_set_cc_op(int32_t val)
> +{
> + tcg_gen_movi_i32(cpu_cc_op, val);
> +}
> +
> static inline void gen_op_movl_A0_seg(int reg)
> {
> tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
> @@ -496,13 +732,21 @@ static inline void gen_op_addq_A0_seg(int reg)
>
> static inline void gen_op_movq_A0_reg(int reg)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
> +#else
> tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> +#endif
> }
>
> static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +#else
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> - if (shift != 0)
> +#endif
> + if (shift != 0)
> tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> }
> @@ -701,14 +945,22 @@ static void gen_exts(int ot, TCGv reg)
>
> static inline void gen_op_jnz_ecx(int size, int label1)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
> +#else
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
> gen_extu(size + 1, cpu_tmp0);
> tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
> }
>
> static inline void gen_op_jz_ecx(int size, int label1)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
> +#else
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
> gen_extu(size + 1, cpu_tmp0);
> tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
> }
> @@ -4834,7 +5086,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> rm = 0; /* avoid warning */
> }
> label1 = gen_new_label();
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
> +#else
> tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
> +#endif
> tcg_gen_sub_tl(t2, t2, t0);
> gen_extu(ot, t2);
> tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
> @@ -5409,7 +5665,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> val = ldub_code(s->pc++);
> tcg_gen_movi_tl(cpu_T3, val);
> } else {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
> +#else
> tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
> }
> gen_shiftd_rm_T1_T3(s, ot, opreg, op);
> break;
> @@ -6317,10 +6577,18 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> /* XXX: specific Intel behaviour ? */
> l1 = gen_new_label();
> gen_jcc1(s, s->cc_op, b ^ 1, l1);
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_regs[reg], t0);
> +#else
> tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
> +#endif
> gen_set_label(l1);
> +#ifdef USE_REGS
> + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
> +#else
> tcg_gen_movi_tl(cpu_tmp0, 0);
> tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
> +#endif
> } else
> #endif
> {
> @@ -7588,6 +7856,60 @@ void optimize_flags_init(void)
> cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
> "cc_tmp");
>
> +#ifdef USE_REGS
> +#ifdef TARGET_X86_64
> + cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_EAX]), "rax");
> + cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_ECX]), "rcx");
> + cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_EDX]), "rdx");
> + cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_EBX]), "rbx");
> + cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_ESP]), "rsp");
> + cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_EBP]), "rbp");
> + cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_ESI]), "rsi");
> + cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[R_EDI]), "rdi");
> + cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[8]), "r8");
> + cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[9]), "r9");
> + cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[10]), "r10");
> + cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[11]), "r11");
> + cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[12]), "r12");
> + cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[13]), "r13");
> + cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[14]), "r14");
> + cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[15]), "r15");
> +#else
> + cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_EAX]), "eax");
> + cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_ECX]), "ecx");
> + cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_EDX]), "edx");
> + cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_EBX]), "ebx");
> + cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_ESP]), "esp");
> + cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_EBP]), "ebp");
> + cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_ESI]), "esi");
> + cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState, regs[R_EDI]), "edi");
> +#endif
> +#endif
> +
> /* register helpers */
> #define GEN_HELPER 2
> #include "helper.h"
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
next prev parent reply other threads:[~2009-09-26 22:42 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-13 21:00 [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers Laurent Desnogues
2009-09-26 22:41 ` Aurelien Jarno [this message]
2009-09-27 12:51 ` Aurelien Jarno
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090926224156.GA22663@hall.aurel32.net \
--to=aurelien@aurel32.net \
--cc=laurent.desnogues@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).