qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU.
Date: Wed, 19 Sep 2012 22:07:18 +0200	[thread overview]
Message-ID: <20120919200718.GA4457@ohm.aurel32.net> (raw)
In-Reply-To: <1348034655-2946-1-git-send-email-rth@twiddle.net>

On Tue, Sep 18, 2012 at 11:04:15PM -0700, Richard Henderson wrote:
> With normal FP, this doesn't have much affect on the generated code,
> because most of the FP operations are not CONST/PURE, and so we spill
> registers in about the same frequency as the explicit load/stores.
> 
> But with Loongson multimedia instructions, which are all integral and
> whose helpers are in fact CONST+PURE, this greatly improves the code.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> 
> As requested, only generating 64-bit fp registers now.  The generated
> code looks quite good on i386.  It could be a tad better for x86_64,
> but it's still better than it was.
> 

Thanks for this new version, the decrease in speed is only about 1% now, and
not noticeable with my new version of the register allocator. It looks
fine to me.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

> 
>  target-mips/translate.c | 96 +++++++++++++++++++++++++++----------------------
>  1 file changed, 54 insertions(+), 42 deletions(-)
> 
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 52eeb2b..4ae15aa 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -478,6 +478,7 @@ static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
>  static TCGv cpu_dspctrl, btarget, bcond;
>  static TCGv_i32 hflags;
>  static TCGv_i32 fpu_fcr0, fpu_fcr31;
> +static TCGv_i64 fpu_f64[32];
>  
>  static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
>  
> @@ -545,26 +546,31 @@ enum {
>      BS_EXCP     = 3, /* We reached an exception condition */
>  };
>  
> -static const char *regnames[] =
> -    { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
> -      "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
> -      "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
> -      "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", };
> +static const char * const regnames[] = {
> +    "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
> +    "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
> +    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
> +    "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
> +};
>  
> -static const char *regnames_HI[] =
> -    { "HI0", "HI1", "HI2", "HI3", };
> +static const char * const regnames_HI[] = {
> +    "HI0", "HI1", "HI2", "HI3",
> +};
>  
> -static const char *regnames_LO[] =
> -    { "LO0", "LO1", "LO2", "LO3", };
> +static const char * const regnames_LO[] = {
> +    "LO0", "LO1", "LO2", "LO3",
> +};
>  
> -static const char *regnames_ACX[] =
> -    { "ACX0", "ACX1", "ACX2", "ACX3", };
> +static const char * const regnames_ACX[] = {
> +    "ACX0", "ACX1", "ACX2", "ACX3",
> +};
>  
> -static const char *fregnames[] =
> -    { "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
> -      "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
> -      "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
> -      "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
> +static const char * const fregnames[] = {
> +    "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
> +    "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
> +    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
> +    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
> +};
>  
>  #ifdef MIPS_DEBUG_DISAS
>  #define MIPS_DEBUG(fmt, ...)                         \
> @@ -658,54 +664,54 @@ static inline void gen_store_srsgpr (int from, int to)
>  }
>  
>  /* Floating point register moves. */
> -static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
> +static void gen_load_fpr32(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
> +    tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
>  }
>  
> -static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
> +static void gen_store_fpr32(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
> +    TCGv_i64 t64 = tcg_temp_new_i64();
> +    tcg_gen_extu_i32_i64(t64, t);
> +    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
> +    tcg_temp_free_i64(t64);
>  }
>  
> -static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
> +static void gen_load_fpr32h(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
> +    TCGv_i64 t64 = tcg_temp_new_i64();
> +    tcg_gen_shri_i64(t64, fpu_f64[reg], 32);
> +    tcg_gen_trunc_i64_i32(t, t64);
> +    tcg_temp_free_i64(t64);
>  }
>  
> -static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
> +static void gen_store_fpr32h(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
> +    TCGv_i64 t64 = tcg_temp_new_i64();
> +    tcg_gen_extu_i32_i64(t64, t);
> +    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
> +    tcg_temp_free_i64(t64);
>  }
>  
> -static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
> +static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
>  {
>      if (ctx->hflags & MIPS_HFLAG_F64) {
> -        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
> +        tcg_gen_mov_i64(t, fpu_f64[reg]);
>      } else {
> -        TCGv_i32 t0 = tcg_temp_new_i32();
> -        TCGv_i32 t1 = tcg_temp_new_i32();
> -        gen_load_fpr32(t0, reg & ~1);
> -        gen_load_fpr32(t1, reg | 1);
> -        tcg_gen_concat_i32_i64(t, t0, t1);
> -        tcg_temp_free_i32(t0);
> -        tcg_temp_free_i32(t1);
> +        tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
>      }
>  }
>  
> -static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
> +static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
>  {
>      if (ctx->hflags & MIPS_HFLAG_F64) {
> -        tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
> +        tcg_gen_mov_i64(fpu_f64[reg], t);
>      } else {
> -        TCGv_i64 t0 = tcg_temp_new_i64();
> -        TCGv_i32 t1 = tcg_temp_new_i32();
> -        tcg_gen_trunc_i64_i32(t1, t);
> -        gen_store_fpr32(t1, reg & ~1);
> +        TCGv_i64 t0;
> +        tcg_gen_deposit_i64(fpu_f64[reg & ~1], fpu_f64[reg & ~1], t, 0, 32);
> +        t0 = tcg_temp_new_i64();
>          tcg_gen_shri_i64(t0, t, 32);
> -        tcg_gen_trunc_i64_i32(t1, t0);
> -        gen_store_fpr32(t1, reg | 1);
> -        tcg_temp_free_i32(t1);
> +        tcg_gen_deposit_i64(fpu_f64[reg | 1], fpu_f64[reg | 1], t0, 0, 32);
>          tcg_temp_free_i64(t0);
>      }
>  }
> @@ -12688,6 +12694,12 @@ static void mips_tcg_init(void)
>          cpu_gpr[i] = tcg_global_mem_new(TCG_AREG0,
>                                          offsetof(CPUMIPSState, active_tc.gpr[i]),
>                                          regnames[i]);
> +
> +    for (i = 0; i < 32; i++) {
> +        int off = offsetof(CPUMIPSState, active_fpu.fpr[i]);
> +        fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]);
> +    }
> +
>      cpu_PC = tcg_global_mem_new(TCG_AREG0,
>                                  offsetof(CPUMIPSState, active_tc.PC), "PC");
>      for (i = 0; i < MIPS_DSP_ACC; i++) {
> -- 
> 1.7.11.4
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

  reply	other threads:[~2012-09-19 20:07 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-19  6:04 [Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU Richard Henderson
2012-09-19 20:07 ` Aurelien Jarno [this message]
2012-10-05 14:20 ` Richard Henderson
2012-10-05 17:03   ` Aurelien Jarno
2012-10-29 14:34     ` Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120919200718.GA4457@ohm.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).