From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([208.118.235.92]:52450) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TEQYP-0001lP-1P for qemu-devel@nongnu.org; Wed, 19 Sep 2012 16:07:33 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TEQYL-0006kX-ON for qemu-devel@nongnu.org; Wed, 19 Sep 2012 16:07:24 -0400 Received: from hall.aurel32.net ([88.191.126.93]:40081) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TEQYL-0006kA-Aq for qemu-devel@nongnu.org; Wed, 19 Sep 2012 16:07:21 -0400 Date: Wed, 19 Sep 2012 22:07:18 +0200 From: Aurelien Jarno Message-ID: <20120919200718.GA4457@ohm.aurel32.net> References: <1348034655-2946-1-git-send-email-rth@twiddle.net> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 Content-Disposition: inline In-Reply-To: <1348034655-2946-1-git-send-email-rth@twiddle.net> Subject: Re: [Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: qemu-devel@nongnu.org On Tue, Sep 18, 2012 at 11:04:15PM -0700, Richard Henderson wrote: > With normal FP, this doesn't have much affect on the generated code, > because most of the FP operations are not CONST/PURE, and so we spill > registers in about the same frequency as the explicit load/stores. > > But with Loongson multimedia instructions, which are all integral and > whose helpers are in fact CONST+PURE, this greatly improves the code. > > Signed-off-by: Richard Henderson > --- > > As requested, only generating 64-bit fp registers now. The generated > code looks quite good on i386. It could be a tad better for x86_64, > but it's still better than it was. > Thanks for this new version, the decrease in speed is only about 1% now, and not noticeable with my new version of the register allocator. It looks fine to me. Reviewed-by: Aurelien Jarno > > target-mips/translate.c | 96 +++++++++++++++++++++++++++---------------------- > 1 file changed, 54 insertions(+), 42 deletions(-) > > diff --git a/target-mips/translate.c b/target-mips/translate.c > index 52eeb2b..4ae15aa 100644 > --- a/target-mips/translate.c > +++ b/target-mips/translate.c > @@ -478,6 +478,7 @@ static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC]; > static TCGv cpu_dspctrl, btarget, bcond; > static TCGv_i32 hflags; > static TCGv_i32 fpu_fcr0, fpu_fcr31; > +static TCGv_i64 fpu_f64[32]; > > static uint32_t gen_opc_hflags[OPC_BUF_SIZE]; > > @@ -545,26 +546,31 @@ enum { > BS_EXCP = 3, /* We reached an exception condition */ > }; > > -static const char *regnames[] = > - { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3", > - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", > - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", > - "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", }; > +static const char * const regnames[] = { > + "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3", > + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", > + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", > + "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", > +}; > > -static const char *regnames_HI[] = > - { "HI0", "HI1", "HI2", "HI3", }; > +static const char * const regnames_HI[] = { > + "HI0", "HI1", "HI2", "HI3", > +}; > > -static const char *regnames_LO[] = > - { "LO0", "LO1", "LO2", "LO3", }; > +static const char * const regnames_LO[] = { > + "LO0", "LO1", "LO2", "LO3", > +}; > > -static const char *regnames_ACX[] = > - { "ACX0", "ACX1", "ACX2", "ACX3", }; > +static const char * const regnames_ACX[] = { > + "ACX0", "ACX1", "ACX2", "ACX3", > +}; > > -static const char *fregnames[] = > - { "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", > - "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", > - "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", > - "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", }; > +static const char * const fregnames[] = { > + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", > + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", > + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", > + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", > +}; > > #ifdef MIPS_DEBUG_DISAS > #define MIPS_DEBUG(fmt, ...) \ > @@ -658,54 +664,54 @@ static inline void gen_store_srsgpr (int from, int to) > } > > /* Floating point register moves. */ > -static inline void gen_load_fpr32 (TCGv_i32 t, int reg) > +static void gen_load_fpr32(TCGv_i32 t, int reg) > { > - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX])); > + tcg_gen_trunc_i64_i32(t, fpu_f64[reg]); > } > > -static inline void gen_store_fpr32 (TCGv_i32 t, int reg) > +static void gen_store_fpr32(TCGv_i32 t, int reg) > { > - tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX])); > + TCGv_i64 t64 = tcg_temp_new_i64(); > + tcg_gen_extu_i32_i64(t64, t); > + tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32); > + tcg_temp_free_i64(t64); > } > > -static inline void gen_load_fpr32h (TCGv_i32 t, int reg) > +static void gen_load_fpr32h(TCGv_i32 t, int reg) > { > - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX])); > + TCGv_i64 t64 = tcg_temp_new_i64(); > + tcg_gen_shri_i64(t64, fpu_f64[reg], 32); > + tcg_gen_trunc_i64_i32(t, t64); > + tcg_temp_free_i64(t64); > } > > -static inline void gen_store_fpr32h (TCGv_i32 t, int reg) > +static void gen_store_fpr32h(TCGv_i32 t, int reg) > { > - tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX])); > + TCGv_i64 t64 = tcg_temp_new_i64(); > + tcg_gen_extu_i32_i64(t64, t); > + tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32); > + tcg_temp_free_i64(t64); > } > > -static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) > +static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) > { > if (ctx->hflags & MIPS_HFLAG_F64) { > - tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d)); > + tcg_gen_mov_i64(t, fpu_f64[reg]); > } else { > - TCGv_i32 t0 = tcg_temp_new_i32(); > - TCGv_i32 t1 = tcg_temp_new_i32(); > - gen_load_fpr32(t0, reg & ~1); > - gen_load_fpr32(t1, reg | 1); > - tcg_gen_concat_i32_i64(t, t0, t1); > - tcg_temp_free_i32(t0); > - tcg_temp_free_i32(t1); > + tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]); > } > } > > -static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg) > +static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) > { > if (ctx->hflags & MIPS_HFLAG_F64) { > - tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d)); > + tcg_gen_mov_i64(fpu_f64[reg], t); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - TCGv_i32 t1 = tcg_temp_new_i32(); > - tcg_gen_trunc_i64_i32(t1, t); > - gen_store_fpr32(t1, reg & ~1); > + TCGv_i64 t0; > + tcg_gen_deposit_i64(fpu_f64[reg & ~1], fpu_f64[reg & ~1], t, 0, 32); > + t0 = tcg_temp_new_i64(); > tcg_gen_shri_i64(t0, t, 32); > - tcg_gen_trunc_i64_i32(t1, t0); > - gen_store_fpr32(t1, reg | 1); > - tcg_temp_free_i32(t1); > + tcg_gen_deposit_i64(fpu_f64[reg | 1], fpu_f64[reg | 1], t0, 0, 32); > tcg_temp_free_i64(t0); > } > } > @@ -12688,6 +12694,12 @@ static void mips_tcg_init(void) > cpu_gpr[i] = tcg_global_mem_new(TCG_AREG0, > offsetof(CPUMIPSState, active_tc.gpr[i]), > regnames[i]); > + > + for (i = 0; i < 32; i++) { > + int off = offsetof(CPUMIPSState, active_fpu.fpr[i]); > + fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]); > + } > + > cpu_PC = tcg_global_mem_new(TCG_AREG0, > offsetof(CPUMIPSState, active_tc.PC), "PC"); > for (i = 0; i < MIPS_DSP_ACC; i++) { > -- > 1.7.11.4 > > > -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net