All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU.
Date: Wed, 19 Sep 2012 22:07:18 +0200	[thread overview]
Message-ID: <20120919200718.GA4457@ohm.aurel32.net> (raw)
In-Reply-To: <1348034655-2946-1-git-send-email-rth@twiddle.net>

On Tue, Sep 18, 2012 at 11:04:15PM -0700, Richard Henderson wrote:
> With normal FP, this doesn't have much affect on the generated code,
> because most of the FP operations are not CONST/PURE, and so we spill
> registers in about the same frequency as the explicit load/stores.
> 
> But with Loongson multimedia instructions, which are all integral and
> whose helpers are in fact CONST+PURE, this greatly improves the code.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> 
> As requested, only generating 64-bit fp registers now.  The generated
> code looks quite good on i386.  It could be a tad better for x86_64,
> but it's still better than it was.
> 

Thanks for this new version, the decrease in speed is only about 1% now, and
not noticeable with my new version of the register allocator. It looks
fine to me.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

> 
>  target-mips/translate.c | 96 +++++++++++++++++++++++++++----------------------
>  1 file changed, 54 insertions(+), 42 deletions(-)
> 
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 52eeb2b..4ae15aa 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -478,6 +478,7 @@ static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
>  static TCGv cpu_dspctrl, btarget, bcond;
>  static TCGv_i32 hflags;
>  static TCGv_i32 fpu_fcr0, fpu_fcr31;
> +static TCGv_i64 fpu_f64[32];
>  
>  static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
>  
> @@ -545,26 +546,31 @@ enum {
>      BS_EXCP     = 3, /* We reached an exception condition */
>  };
>  
> -static const char *regnames[] =
> -    { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
> -      "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
> -      "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
> -      "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", };
> +static const char * const regnames[] = {
> +    "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
> +    "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
> +    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
> +    "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
> +};
>  
> -static const char *regnames_HI[] =
> -    { "HI0", "HI1", "HI2", "HI3", };
> +static const char * const regnames_HI[] = {
> +    "HI0", "HI1", "HI2", "HI3",
> +};
>  
> -static const char *regnames_LO[] =
> -    { "LO0", "LO1", "LO2", "LO3", };
> +static const char * const regnames_LO[] = {
> +    "LO0", "LO1", "LO2", "LO3",
> +};
>  
> -static const char *regnames_ACX[] =
> -    { "ACX0", "ACX1", "ACX2", "ACX3", };
> +static const char * const regnames_ACX[] = {
> +    "ACX0", "ACX1", "ACX2", "ACX3",
> +};
>  
> -static const char *fregnames[] =
> -    { "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
> -      "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
> -      "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
> -      "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
> +static const char * const fregnames[] = {
> +    "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
> +    "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
> +    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
> +    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
> +};
>  
>  #ifdef MIPS_DEBUG_DISAS
>  #define MIPS_DEBUG(fmt, ...)                         \
> @@ -658,54 +664,54 @@ static inline void gen_store_srsgpr (int from, int to)
>  }
>  
>  /* Floating point register moves. */
> -static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
> +static void gen_load_fpr32(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
> +    tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
>  }
>  
> -static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
> +static void gen_store_fpr32(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
> +    TCGv_i64 t64 = tcg_temp_new_i64();
> +    tcg_gen_extu_i32_i64(t64, t);
> +    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
> +    tcg_temp_free_i64(t64);
>  }
>  
> -static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
> +static void gen_load_fpr32h(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
> +    TCGv_i64 t64 = tcg_temp_new_i64();
> +    tcg_gen_shri_i64(t64, fpu_f64[reg], 32);
> +    tcg_gen_trunc_i64_i32(t, t64);
> +    tcg_temp_free_i64(t64);
>  }
>  
> -static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
> +static void gen_store_fpr32h(TCGv_i32 t, int reg)
>  {
> -    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
> +    TCGv_i64 t64 = tcg_temp_new_i64();
> +    tcg_gen_extu_i32_i64(t64, t);
> +    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
> +    tcg_temp_free_i64(t64);
>  }
>  
> -static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
> +static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
>  {
>      if (ctx->hflags & MIPS_HFLAG_F64) {
> -        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
> +        tcg_gen_mov_i64(t, fpu_f64[reg]);
>      } else {
> -        TCGv_i32 t0 = tcg_temp_new_i32();
> -        TCGv_i32 t1 = tcg_temp_new_i32();
> -        gen_load_fpr32(t0, reg & ~1);
> -        gen_load_fpr32(t1, reg | 1);
> -        tcg_gen_concat_i32_i64(t, t0, t1);
> -        tcg_temp_free_i32(t0);
> -        tcg_temp_free_i32(t1);
> +        tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
>      }
>  }
>  
> -static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
> +static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
>  {
>      if (ctx->hflags & MIPS_HFLAG_F64) {
> -        tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
> +        tcg_gen_mov_i64(fpu_f64[reg], t);
>      } else {
> -        TCGv_i64 t0 = tcg_temp_new_i64();
> -        TCGv_i32 t1 = tcg_temp_new_i32();
> -        tcg_gen_trunc_i64_i32(t1, t);
> -        gen_store_fpr32(t1, reg & ~1);
> +        TCGv_i64 t0;
> +        tcg_gen_deposit_i64(fpu_f64[reg & ~1], fpu_f64[reg & ~1], t, 0, 32);
> +        t0 = tcg_temp_new_i64();
>          tcg_gen_shri_i64(t0, t, 32);
> -        tcg_gen_trunc_i64_i32(t1, t0);
> -        gen_store_fpr32(t1, reg | 1);
> -        tcg_temp_free_i32(t1);
> +        tcg_gen_deposit_i64(fpu_f64[reg | 1], fpu_f64[reg | 1], t0, 0, 32);
>          tcg_temp_free_i64(t0);
>      }
>  }
> @@ -12688,6 +12694,12 @@ static void mips_tcg_init(void)
>          cpu_gpr[i] = tcg_global_mem_new(TCG_AREG0,
>                                          offsetof(CPUMIPSState, active_tc.gpr[i]),
>                                          regnames[i]);
> +
> +    for (i = 0; i < 32; i++) {
> +        int off = offsetof(CPUMIPSState, active_fpu.fpr[i]);
> +        fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]);
> +    }
> +
>      cpu_PC = tcg_global_mem_new(TCG_AREG0,
>                                  offsetof(CPUMIPSState, active_tc.PC), "PC");
>      for (i = 0; i < MIPS_DSP_ACC; i++) {
> -- 
> 1.7.11.4
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

  reply	other threads:[~2012-09-19 20:07 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-19  6:04 [Qemu-devel] [PATCH v2] target-mips: Use TCG registers for the FPU Richard Henderson
2012-09-19 20:07 ` Aurelien Jarno [this message]
2012-10-05 14:20 ` Richard Henderson
2012-10-05 17:03   ` Aurelien Jarno
2012-10-29 14:34     ` Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120919200718.GA4457@ohm.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.