All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tom Musta <tommusta@gmail.com>
To: Paolo Bonzini <pbonzini@redhat.com>, qemu-devel@nongnu.org
Cc: dgibson@redhat.com, qemu-ppc@nongnu.org,
	Richard Henderson <rth@twiddle.net>
Subject: Re: [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers
Date: Thu, 04 Sep 2014 13:27:03 -0500	[thread overview]
Message-ID: <5408AEF7.7030000@gmail.com> (raw)
In-Reply-To: <1409246113-6519-16-git-send-email-pbonzini@redhat.com>

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
> This makes comparisons much smaller and faster.  The speedup is
> approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
> 
> Note that CRF_* constants are flipped to match PowerPC's big
> bit-endianness.  Previously, the CR register was effectively stored
> in mixed endianness, so now there is less indirection going on.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

There are some issues with this patch -- it doesn't compile due to some typing issues.  There are also some functional issues.  Some details are below

(nit) Also it doesnt pass checkpatch.pl.

> ---
>  linux-user/main.c       |   4 +-
>  target-ppc/cpu.h        |  33 ++++--
>  target-ppc/fpu_helper.c |  39 ++----
>  target-ppc/helper.h     |   6 -
>  target-ppc/int_helper.c |   2 +-
>  target-ppc/machine.c    |   9 ++
>  target-ppc/translate.c  | 307 +++++++++++++++++++++++++-----------------------
>  7 files changed, 204 insertions(+), 196 deletions(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 152c031..b403f24 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
>               * PPC ABI uses overflow flag in cr0 to signal an error
>               * in syscalls.
>               */
> -            env->crf[0] &= ~0x1;
> +            env->cr[CRF_SO] = 0;
>              ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
>                               env->gpr[5], env->gpr[6], env->gpr[7],
>                               env->gpr[8], 0, 0);
> @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
>                  break;
>              }
>              if (ret > (target_ulong)(-515)) {
> -                env->crf[0] |= 0x1;
> +                env->cr[CRF_SO] = 1;
>                  ret = -ret;
>              }
>              env->gpr[3] = ret;
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 05c29b2..67510e8 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -939,7 +939,7 @@ struct CPUPPCState {
>      /* CTR */
>      target_ulong ctr;
>      /* condition register */
> -    uint32_t crf[8];
> +    uint32_t cr[32];
>  #if defined(TARGET_PPC64)
>      /* CFAR */
>      target_ulong cfar;
> @@ -1058,6 +1058,9 @@ struct CPUPPCState {
>      uint64_t dtl_addr, dtl_size;
>  #endif /* TARGET_PPC64 */
>  
> +    /* condition register, for migration compatibility */
> +    uint32_t crf[8];
> +
>      int error_code;
>      uint32_t pending_interrupts;
>  #if !defined(CONFIG_USER_ONLY)
> @@ -1200,12 +1203,20 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask);
>  
>  static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
>  {
> -    return env->crf[i];
> +    uint32_t r;
> +    r = env->cr[i * 4];
> +    r = (r << 1) | (env->cr[i * 4 + 1]);
> +    r = (r << 1) | (env->cr[i * 4 + 2]);
> +    r = (r << 1) | (env->cr[i * 4 + 3]);
> +    return r;
>  }
>  
>  static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
>  {
> -    env->crf[i] = val;
> +    env->cr[i * 4 + 0] = (val & 0x08) != 0;
> +    env->cr[i * 4 + 1] = (val & 0x04) != 0;
> +    env->cr[i * 4 + 2] = (val & 0x02) != 0;
> +    env->cr[i * 4 + 3] = (val & 0x01) != 0;
>  }
>  
>  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
> @@ -1256,14 +1267,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
>  
>  /*****************************************************************************/
>  /* CRF definitions */
> -#define CRF_LT        3
> -#define CRF_GT        2
> -#define CRF_EQ        1
> -#define CRF_SO        0
> -#define CRF_CH        (1 << CRF_LT)
> -#define CRF_CL        (1 << CRF_GT)
> -#define CRF_CH_OR_CL  (1 << CRF_EQ)
> -#define CRF_CH_AND_CL (1 << CRF_SO)
> +#define CRF_LT        0
> +#define CRF_GT        1
> +#define CRF_EQ        2
> +#define CRF_SO        3
> +#define CRF_CH        CRF_LT
> +#define CRF_CL        CRF_GT
> +#define CRF_CH_OR_CL  CRF_EQ
> +#define CRF_CH_AND_CL CRF_SO
>  
>  /* XER definitions */
>  #define XER_SO  31
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 1ccbcf3..9574ebe 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
> -    ppc_set_crf(env, crfD, 1 << ret);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
> +    ppc_set_crf(env, crfD, 0x08 >> ret);
>  
>      if (unlikely(ret == CRF_SO
>                   && (float64_is_signaling_nan(farg1.d) ||
> @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
> -    ppc_set_crf(env, crfD, 1 << ret);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
> +    ppc_set_crf(env, crfD, 0x08 >> ret);
>  
>      if (unlikely(ret == CRF_SO)) {
>          if (float64_is_signaling_nan(farg1.d) ||
> @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_lt(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
> +    return !float32_le(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_eq(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1465,25 +1465,6 @@ static inline uint32_t evcmp_merge(int t0, int t1)
>      return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
>  }
>  
> -#define HELPER_VECTOR_SPE_CMP(name)                                     \
> -    uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
> -    {                                                                   \
> -        return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32),          \
> -                           e##name(env, op1, op2));                     \
> -    }
> -/* evfststlt */
> -HELPER_VECTOR_SPE_CMP(fststlt);
> -/* evfststgt */
> -HELPER_VECTOR_SPE_CMP(fststgt);
> -/* evfststeq */
> -HELPER_VECTOR_SPE_CMP(fststeq);
> -/* evfscmplt */
> -HELPER_VECTOR_SPE_CMP(fscmplt);
> -/* evfscmpgt */
> -HELPER_VECTOR_SPE_CMP(fscmpgt);
> -/* evfscmpeq */
> -HELPER_VECTOR_SPE_CMP(fscmpeq);
> -
>  /* Double-precision floating-point conversion */
>  uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
>  {
> @@ -1725,7 +1706,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_lt(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1734,7 +1715,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
> +    return !float64_le(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1743,7 +1724,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 5342f13..8d6a92b 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -493,12 +493,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
>  DEF_HELPER_3(efscmplt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
> -DEF_HELPER_3(evfststlt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststeq, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
>  DEF_HELPER_2(efdcfsi, i64, env, i32)
>  DEF_HELPER_2(efdcfsid, i64, env, i64)
>  DEF_HELPER_2(efdcfui, i64, env, i32)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 2287064..d3ace6a 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -2602,7 +2602,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
>   done:
>      env->xer = (env->xer & ~0x7F) | i;
>      if (update_Rc) {
> -        env->crf[0] |= xer_so;
> +        env->cr[CRF_SO] = xer_so;
>      }
>      return i;
>  }
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index c801b82..9fa309a 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
>      CPUPPCState *env = &cpu->env;
>      int i;
>  
> +    for (i = 0; i < 8; i++) {
> +        env->crf[i] = ppc_get_crf(env, i);
> +    }
> +
>      env->spr[SPR_LR] = env->lr;
>      env->spr[SPR_CTR] = env->ctr;
>      env->spr[SPR_XER] = env->xer;
> @@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
>       * software has to take care of running QEMU in a compatible mode.
>       */
>      env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
> +
> +    for (i = 0; i < 8; i++) {
> +        ppc_set_crf(env, i, env->crf[i]);
> +    }
> +
>      env->lr = env->spr[SPR_LR];
>      env->ctr = env->spr[SPR_CTR];
>      env->xer = env->spr[SPR_XER];
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 1ed6a8f..dd19b39 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
>      + 10*4 + 22*5 /* FPR */
>      + 2*(10*6 + 22*7) /* AVRh, AVRl */
>      + 10*5 + 22*6 /* VSR */
> -    + 8*5 /* CRF */];
> +    + 32*8 /* CR */];
>  static TCGv cpu_gpr[32];
>  static TCGv cpu_gprh[32];
>  static TCGv_i64 cpu_fpr[32];
>  static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
>  static TCGv_i64 cpu_vsr[32];
> -static TCGv_i32 cpu_crf[8];
> +static TCGv_i32 cpu_cr[32];
>  static TCGv cpu_nip;
>  static TCGv cpu_msr;
>  static TCGv cpu_ctr;
> @@ -89,12 +89,13 @@ void ppc_translate_init(void)
>      p = cpu_reg_names;
>      cpu_reg_names_size = sizeof(cpu_reg_names);
>  
> -    for (i = 0; i < 8; i++) {
> -        snprintf(p, cpu_reg_names_size, "crf%d", i);
> -        cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
> -                                            offsetof(CPUPPCState, crf[i]), p);
> -        p += 5;
> -        cpu_reg_names_size -= 5;
> +    for (i = 0; i < 32; i++) {
> +        static const char names[] = "lt\0gt\0eq\0so";
> +        snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) * 3);
> +        cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                           offsetof(CPUPPCState, cr[i]), p);
> +        p += 8;
> +        cpu_reg_names_size -= 8;
>      }
>  
>      for (i = 0; i < 32; i++) {
> @@ -251,17 +252,30 @@ static inline void gen_reset_fpstatus(void)
>  
>  static inline void gen_op_mfcr(TCGv dest, int first_cr, int shift)
>  {
> -    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +
> +    tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);

This leaks t0.

>  }
>  
>  static inline void gen_op_mtcr(int first_cr, TCGv src, int shift)
>  {
>      if (shift) {
> -        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
> +        tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
>      } else {
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
>      }
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
> +    tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
>  }
>  
>  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
> @@ -675,27 +689,19 @@ static bool is_user_mode(DisasContext *ctx)
>  static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
>  {
>      TCGv t0 = tcg_temp_new();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);

This looks correct to me but is causing problems.  The above statement seems to get dropped in the generated asm ... at least on a PPC host:

IN:
0x00000000100005b4:  cmpw    cr3,r30,r29

OUT: [size=160]
0x6041ad30:  lwz     r14,-4(r27)
0x6041ad34:  cmpwi   cr7,r14,0
0x6041ad38:  bne-    cr7,0x6041adbc
0x6041ad3c:  ld      r14,240(r27)   <<< r30
0x6041ad40:  ld      r15,232(r27)   <<< r31
0x6041ad44:  cmpw    cr7,r14,r15    <<< this is the TCG_COND_LTx code
0x6041ad48:  li      r16,1
0x6041ad4c:  li      r0,0
0x6041ad50:  isel    r16,r16,r0,28
0x6041ad54:  stw     r16,576(r27)   <<< store cpu_cr[LT]
0x6041ad58:  cmpw    cr7,r14,r15
0x6041ad5c:  li      r16,1
0x6041ad60:  li      r0,0
0x6041ad64:  isel    r16,r16,r0,29
0x6041ad68:  stw     r16,580(r27)   <<< store cpu_cr[GT]
0x6041ad6c:  cmplw   cr7,r14,r15
0x6041ad70:  li      r14,1
0x6041ad74:  li      r0,0
0x6041ad78:  isel    r14,r14,r0,30
0x6041ad7c:  stw     r14,584(r27)   <<< store cpu_cr[EQ]
0x6041ad80:  .long 0x0
0x6041ad84:  .long 0x0

Richard:  any ideas or hints on how to proceed?
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_LT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_GT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
>  
>      tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_EQ);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
>  
>      tcg_temp_free(t0);
> -    tcg_temp_free_i32(t1);
>  }
>  
>  static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
> @@ -707,17 +713,22 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
>  
>  static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
>  {
> -    TCGv t0, t1;
> +    TCGv_i32 t0, t1;
> +
>      t0 = tcg_temp_new();
>      t1 = tcg_temp_new();

Needs to be tcg_temp_new_i32() ....

> -    if (s) {
> -        tcg_gen_ext32s_tl(t0, arg0);
> -        tcg_gen_ext32s_tl(t1, arg1);
> -    } else {
> -        tcg_gen_ext32u_tl(t0, arg0);
> -        tcg_gen_ext32u_tl(t1, arg1);
> -    }
> -    gen_op_cmp(t0, t1, s, crf);
> +    tcg_gen_trunc_tl_i32(t0, arg0);
> +    tcg_gen_trunc_tl_i32(t1, arg1);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), 
> +                        cpu_cr[crf * 4 + CRF_LT], t0, t1);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), 
> +                        cpu_cr[crf * 4 + CRF_GT], t0, t1);
> +
> +    tcg_gen_setcond_i32(TCG_COND_EQ, 
> +                        cpu_cr[crf * 4 + CRF_EQ], t0, t1);
> +
>      tcg_temp_free(t1);
>      tcg_temp_free(t0);

... and tcg_temp_free_i32()

>  }
> @@ -790,15 +801,10 @@ static void gen_cmpli(DisasContext *ctx)
>  static void gen_isel(DisasContext *ctx)
>  {
>      uint32_t bi = rC(ctx->opcode);
> -    uint32_t mask;
> -    TCGv_i32 t0;
>      TCGv t1, true_op, zero;
>  
> -    mask = 0x08 >> (bi & 0x03);
> -    t0 = tcg_temp_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
>      t1 = tcg_temp_new();
> -    tcg_gen_extu_i32_tl(t1, t0);
> +    tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
>      zero = tcg_const_tl(0);
>      if (rA(ctx->opcode) == 0)
>          true_op = zero;




> @@ -2288,21 +2294,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
>  
>  static void gen_ftdiv(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
>                       cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  static void gen_ftsqrt(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  
> @@ -3300,10 +3314,13 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA,
>  {
>      int l1;
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +    tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
>      l1 = gen_new_label();
>      tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
> -    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
>  #if defined(TARGET_PPC64)
>      if (size == 8) {
>          gen_qemu_st64(ctx, cpu_gpr[reg], EA);
> @@ -3870,17 +3887,11 @@ static inline void gen_bcond(DisasContext *ctx, int type)
>      if ((bo & 0x10) == 0) {
>          /* Test CR */
>          uint32_t bi = BI(ctx->opcode);
> -        uint32_t mask = 0x08 >> (bi & 0x03);
> -        TCGv_i32 temp = tcg_temp_new_i32();
> -
>          if (bo & 0x8) {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
>          } else {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
>          }
> -        tcg_temp_free_i32(temp);
>      }
>      gen_update_cfar(ctx, ctx->nip);
>      if (type == BCOND_IM) {
> @@ -3929,35 +3940,11 @@ static void gen_bctar(DisasContext *ctx)
>  }
>  
>  /***                      Condition register logical                       ***/
> -#define GEN_CRLOGIC(name, tcg_op, opc)                                        \
> -static void glue(gen_, name)(DisasContext *ctx)                                       \
> -{                                                                             \
> -    uint8_t bitmask;                                                          \
> -    int sh;                                                                   \
> -    TCGv_i32 t0, t1;                                                          \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03);             \
> -    t0 = tcg_temp_new_i32();                                                  \
> -    if (sh > 0)                                                               \
> -        tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh);            \
> -    else if (sh < 0)                                                          \
> -        tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh);           \
> -    else                                                                      \
> -        tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]);                 \
> -    t1 = tcg_temp_new_i32();                                                  \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03);             \
> -    if (sh > 0)                                                               \
> -        tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh);            \
> -    else if (sh < 0)                                                          \
> -        tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh);           \
> -    else                                                                      \
> -        tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]);                 \
> -    tcg_op(t0, t0, t1);                                                       \
> -    bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03);                             \
> -    tcg_gen_andi_i32(t0, t0, bitmask);                                        \
> -    tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask);          \
> -    tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1);                  \
> -    tcg_temp_free_i32(t0);                                                    \
> -    tcg_temp_free_i32(t1);                                                    \
> +#define GEN_CRLOGIC(name, tcg_op, opc)                                         \
> +static void glue(gen_, name)(DisasContext *ctx)                                \
> +{                                                                              \
> +    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],               \
> +           cpu_cr[crbB(ctx->opcode)]);                                         \
>  }
>  

This is a very nice cleanup ... but it oversteers just a little.  For some CR logical instructions, the generated code can produce non-zero bits in the i32 cr variable in places other than the LSB.
For example, consider crnand, which produces the following on a PPC host:

IN:
0x0000000010000578:  crnand  4*cr7+so,4*cr7+lt,4*cr7+eq

OUT: [size=112]
0x6041a630:  lwz     r14,-4(r27)
0x6041a634:  cmpwi   cr7,r14,0
0x6041a638:  bne-    cr7,0x6041a68c
0x6041a63c:  lwz     r14,640(r27)
0x6041a640:  lwz     r15,648(r27)
0x6041a644:  nand    r14,r14,r15
0x6041a648:  andi.   r14,r14,1
0x6041a64c:  stw     r14,652(r27)
0x6041a650:  .long 0x0
0x6041a654:  .long 0x0
0x6041a658:  .long 0x0
0x6041a65c:  .long 0x0

The host nand operation will always produce an i32 value that has 1s in bits 0-30, since they are presumably zero.  A brute-force fix would be to add a tcg_gen_andi_i32(D,D,1) to your macro.  But I think this is required only for a subset of the
instructions (crnand, crnor, creqv, crorc).

>  /* crand */
> @@ -3980,7 +3967,11 @@ GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
>  /* mcrf */
>  static void gen_mcrf(DisasContext *ctx)
>  {
> -    tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
> +    int i;
> +    for (i = 0; i < 4; i++) {
> +        tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
> +                        cpu_cr[crfS(ctx->opcode) * 4 + i]);
> +    }
>  }
>  
>  /***                           System linkage                              ***/
> @@ -4133,20 +4124,12 @@ static void gen_write_xer(TCGv src)
>  /* mcrxr */
>  static void gen_mcrxr(DisasContext *ctx)
>  {
> -    TCGv_i32 t0 = tcg_temp_new_i32();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
> -    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
> -
> -    tcg_gen_trunc_tl_i32(t0, cpu_so);
> -    tcg_gen_trunc_tl_i32(t1, cpu_ov);
> -    tcg_gen_trunc_tl_i32(dst, cpu_ca);
> -    tcg_gen_shli_i32(t0, t0, 3);
> -    tcg_gen_shli_i32(t1, t1, 2);
> -    tcg_gen_shli_i32(dst, dst, 1);
> -    tcg_gen_or_i32(dst, dst, t0);
> -    tcg_gen_or_i32(dst, dst, t1);
> -    tcg_temp_free_i32(t0);
> -    tcg_temp_free_i32(t1);
> +    int crf = crfD(ctx->opcode);
> +
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
> +    tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
>  
>      tcg_gen_movi_tl(cpu_so, 0);
>      tcg_gen_movi_tl(cpu_ov, 0);
> @@ -6320,11 +6303,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
>      gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -6401,11 +6386,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
>      gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -7371,7 +7358,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
>  static void gen_##op(DisasContext *ctx)             \
>  {                                                   \
>      TCGv_ptr ra, rb, rd;                            \
> -    TCGv_i32 ps;                                    \
> +    TCGv_i32 ps, crf;                               \
>                                                      \
>      if (unlikely(!ctx->altivec_enabled)) {          \
>          gen_exception(ctx, POWERPC_EXCP_VPU);       \
> @@ -7383,13 +7370,16 @@ static void gen_##op(DisasContext *ctx)             \
>      rd = gen_avr_ptr(rD(ctx->opcode));              \
>                                                      \
>      ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
> +    crf = tcg_temp_new_i32();                       \
>                                                      \
> -    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
> +    gen_helper_##op(crf, rd, ra, rb, ps);           \
> +    gen_op_mtcr(6 << 2, crf, 0);                    \
>                                                      \
>      tcg_temp_free_ptr(ra);                          \
>      tcg_temp_free_ptr(rb);                          \
>      tcg_temp_free_ptr(rd);                          \
>      tcg_temp_free_i32(ps);                          \
> +    tcg_temp_free_ptr(crf);                         \

tcg_temp_free_i32() ?

>  }
>  
>  GEN_BCD(bcdadd)
> @@ -8217,6 +8207,7 @@ static void gen_##name(DisasContext *ctx)        \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra, rb;                              \
> +    TCGv_i32 tmp;                                 \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8224,8 +8215,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      rb = gen_fprp_ptr(rB(ctx->opcode));           \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, rb);           \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, rb);      \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_ptr(rb);                        \
>  }
> @@ -8234,7 +8227,7 @@ static void gen_##name(DisasContext *ctx)         \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra;                                  \
> -    TCGv_i32 dcm;                                 \
> +    TCGv_i32 dcm, tmp;                            \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8242,8 +8235,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      dcm = tcg_const_i32(DCM(ctx->opcode));        \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, dcm);          \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, dcm);     \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_i32(dcm);                       \
>  }
> @@ -8668,37 +8663,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
>  #define GEN_SPEOP_COMP(name, tcg_cond)                                        \
>  static inline void gen_##name(DisasContext *ctx)                              \
>  {                                                                             \
> +    TCGv tmp = tcg_temp_new();                                                \
> +                                                                              \
>      if (unlikely(!ctx->spe_enabled)) {                                        \
>          gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
>          return;                                                               \
>      }                                                                         \
> -    int l1 = gen_new_label();                                                 \
> -    int l2 = gen_new_label();                                                 \
> -    int l3 = gen_new_label();                                                 \
> -    int l4 = gen_new_label();                                                 \
>                                                                                \
>      tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);    \
>      tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);    \
>      tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);  \
>      tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);  \
>                                                                                \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                     \
> -                       cpu_gpr[rB(ctx->opcode)], l1);                         \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                          \
> -    tcg_gen_br(l2);                                                           \
> -    gen_set_label(l1);                                                        \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                              \
> -                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                  \
> -    gen_set_label(l2);                                                        \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                    \
> -                       cpu_gprh[rB(ctx->opcode)], l3);                        \
> -    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  \
> -                     ~(CRF_CH | CRF_CH_AND_CL));                              \
> -    tcg_gen_br(l4);                                                           \
> -    gen_set_label(l3);                                                        \
> -    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],   \
> -                    CRF_CH | CRF_CH_OR_CL);                                   \
> -    gen_set_label(l4);                                                        \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
> +                       cpu_gpr[rA(ctx->opcode)],                              \
> +                       cpu_gpr[rB(ctx->opcode)]);                             \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp);        \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                         \
> +                       cpu_gprh[rA(ctx->opcode)],                             \
> +                       cpu_gprh[rB(ctx->opcode)]);                            \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp);        \
> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],              \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                    \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                   \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],            \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  \
>  }
>  GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
>  GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
> @@ -8769,22 +8759,20 @@ static inline void gen_evsel(DisasContext *ctx)
>      int l2 = gen_new_label();
>      int l3 = gen_new_label();
>      int l4 = gen_new_label();
> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
>      tcg_gen_br(l2);
>      gen_set_label(l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
>      gen_set_label(l2);
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0, l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
>      tcg_gen_br(l4);
>      gen_set_label(l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
>      gen_set_label(l4);
> -    tcg_temp_free_i32(t0);
>  }
>  
>  static void gen_evsel0(DisasContext *ctx)
> @@ -9366,9 +9354,12 @@ static inline void gen_##name(DisasContext *ctx)                              \
>      t0 = tcg_temp_new_i32();                                                  \
>      t1 = tcg_temp_new_i32();                                                  \
>                                                                                \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
>      tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
>      tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, t1); \
>                                                                                \
>      tcg_temp_free_i32(t0);                                                    \
>      tcg_temp_free_i32(t1);                                                    \
> @@ -9385,10 +9376,32 @@ static inline void gen_##name(DisasContext *ctx)                              \
>      t1 = tcg_temp_new_i64();                                                  \
>      gen_load_gpr64(t0, rA(ctx->opcode));                                      \
>      gen_load_gpr64(t1, rB(ctx->opcode));                                      \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);              \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);              \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env,        \
> +                      t0, t1);                                                \
>      tcg_temp_free_i64(t0);                                                    \
>      tcg_temp_free_i64(t1);                                                    \
>  }
> +#define GEN_SPEFPUOP_COMP_V64(name, helper)                                   \
> +static inline void gen_##name(DisasContext *ctx)                              \
> +{                                                                             \
> +    if (unlikely(!ctx->spe_enabled)) {                                        \
> +        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
> +        return;                                                               \
> +    }                                                                         \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env,      \
> +                        cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);  \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env,      \
> +                        cpu_gprh[rA(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);\

This doesn't compile for 64 bit targets because the helpers declare i32 types for the GPR arguments.

> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],              \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                    \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                   \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],            \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  \
> +}
>  
>  /* Single precision floating-point vectors operations */
>  /* Arithmetic */
> @@ -9443,12 +9456,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
>  GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
>  
>  /* Comparison */
> -GEN_SPEFPUOP_COMP_64(evfscmpgt);
> -GEN_SPEFPUOP_COMP_64(evfscmplt);
> -GEN_SPEFPUOP_COMP_64(evfscmpeq);
> -GEN_SPEFPUOP_COMP_64(evfststgt);
> -GEN_SPEFPUOP_COMP_64(evfststlt);
> -GEN_SPEFPUOP_COMP_64(evfststeq);
> +GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
> +GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
> +GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
> +GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
> +GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
> +GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
>  
>  /* Opcodes definitions */
>  GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
> 

  reply	other threads:[~2014-09-04 18:27 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-28 17:14 [Qemu-devel] [RFT/RFH PATCH 00/16] PPC speedup patches for TCG Paolo Bonzini
2014-08-28 17:14 ` [Qemu-devel] [PATCH 01/17] ppc: do not look at the MMU index Paolo Bonzini
2014-08-28 17:14 ` [Qemu-devel] [PATCH 02/17] ppc: avoid excessive TLB flushing Paolo Bonzini
2014-08-28 17:30   ` Peter Maydell
2014-08-28 19:35     ` Paolo Bonzini
2014-09-05  6:00       ` David Gibson
2014-09-05  7:10   ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-09-05 12:11     ` Paolo Bonzini
2014-09-09 16:42       ` Paolo Bonzini
2014-09-09 20:51         ` Alexander Graf
2014-08-28 17:14 ` [Qemu-devel] [PATCH 03/17] ppc: fix monitor access to CR Paolo Bonzini
2014-09-03 18:21   ` Tom Musta
2014-09-05  7:10     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 04/17] ppc: use ARRAY_SIZE in gdbstub.c Paolo Bonzini
2014-09-03 18:21   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 05/17] ppc: use CRF_* in fpu_helper.c Paolo Bonzini
2014-09-03 18:21   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 06/17] ppc: use CRF_* in int_helper.c Paolo Bonzini
2014-09-03 18:28   ` Tom Musta
2014-09-05  7:12     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 07/17] ppc: fix result of DLMZB when no zero bytes are found Paolo Bonzini
2014-09-03 18:28   ` Tom Musta
2014-09-05  7:26     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 08/17] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
2014-09-03 18:28   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 09/17] ppc: reorganize gen_compute_fprf Paolo Bonzini
2014-09-03 18:29   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 10/17] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
2014-09-03 18:58   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 11/17] ppc: rename gen_set_cr6_from_fpscr Paolo Bonzini
2014-09-03 19:41   ` Tom Musta
2014-09-05  7:27     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 12/17] ppc: use movcond for isel Paolo Bonzini
2014-08-29 18:30   ` Richard Henderson
2014-09-03 19:41   ` Tom Musta
2014-09-15 13:39     ` Paolo Bonzini
2014-08-28 17:15 ` [Qemu-devel] [PATCH 13/17] ppc: compute mask from BI using right shift Paolo Bonzini
2014-09-03 20:59   ` Tom Musta
2014-09-05  7:29     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 14/17] ppc: introduce ppc_get_crf and ppc_set_crf Paolo Bonzini
2014-09-04 18:26   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers Paolo Bonzini
2014-09-04 18:27   ` Tom Musta [this message]
2014-09-09 15:44     ` Paolo Bonzini
2014-09-09 16:41       ` Paolo Bonzini
2014-09-09 16:03     ` Richard Henderson
2014-09-09 16:26       ` Paolo Bonzini
2014-08-28 17:15 ` [Qemu-devel] [PATCH 16/17] ppc: inline ppc_get_crf/ppc_set_crf when clearer Paolo Bonzini
2014-08-28 17:15 ` [Qemu-devel] [PATCH 17/17] ppc: dump all 32 CR bits Paolo Bonzini
2014-08-28 18:05 ` [Qemu-devel] [RFT/RFH PATCH 00/16] PPC speedup patches for TCG Tom Musta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5408AEF7.7030000@gmail.com \
    --to=tommusta@gmail.com \
    --cc=dgibson@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.