[Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
@ 2013-10-03 12:51 Will Newton
  2013-10-03 12:59 ` Peter Maydell
  2013-10-03 14:34 ` Richard Henderson
  0 siblings, 2 replies; 9+ messages in thread
From: Will Newton @ 2013-10-03 12:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: patches


This adds support for the VSEL floating point selection instruction
which was added in ARMv8. It is based on the previous patch[1] from
Mans Rullgard, but attempts to address the feedback given on that patch.

[1] http://lists.nongnu.org/archive/html/qemu-devel/2013-06/msg03117.html

Signed-off-by: Will Newton <will.newton@linaro.org>
---
 target-arm/translate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

Changes in v2:
 - Integrate vsel decoding into disas_vfp_insn

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 998bde2..5e49334 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2880,6 +2880,98 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 rm = VFP_SREG_M(insn);
             }

+            if ((insn & 0x0f800e50) == 0x0e000a00) {
+                /* vsel */
+                uint32_t cc = (insn >> 20) & 3;
+                TCGv_i32 tmp, zero;
+
+                /* ARMv8 VFP.  */
+                if (!arm_feature(env, ARM_FEATURE_V8))
+                    return 1;
+
+                zero = tcg_const_tl(0);
+
+                if (dp) {
+                    TCGv_i64 ftmp1, ftmp2, ftmp3;
+
+                    ftmp1 = tcg_temp_new_i64();
+                    ftmp2 = tcg_temp_new_i64();
+                    ftmp3 = tcg_temp_new_i64();
+                    tcg_gen_ld_f64(ftmp1, cpu_env, vfp_reg_offset(dp, rn));
+                    tcg_gen_ld_f64(ftmp2, cpu_env, vfp_reg_offset(dp, rm));
+                    switch (cc) {
+                    case 0: /* eq: Z */
+                        tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
+                                            ftmp1, ftmp2);
+                        break;
+                    case 1: /* vs: V */
+                        tcg_gen_movcond_i64(TCG_COND_LT, ftmp3, cpu_VF, zero,
+                                            ftmp1, ftmp2);
+                        break;
+                    case 2: /* ge: N == V -> N ^ V == 0 */
+                        tmp = tcg_temp_new_i32();
+                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+                        tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero,
+                                            ftmp1, ftmp2);
+                        tcg_temp_free_i32(tmp);
+                        break;
+                    case 3: /* gt: !Z && N == V */
+                        tcg_gen_movcond_i64(TCG_COND_NE, ftmp3, cpu_ZF, zero,
+                                            ftmp1, ftmp2);
+                        tmp = tcg_temp_new_i32();
+                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+                        tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero,
+                                            ftmp3, ftmp2);
+                        tcg_temp_free_i32(tmp);
+                        break;
+                    }
+                    tcg_gen_st_f64(ftmp3, cpu_env, vfp_reg_offset(dp, rd));
+                    tcg_temp_free_i64(ftmp1);
+                    tcg_temp_free_i64(ftmp2);
+                    tcg_temp_free_i64(ftmp3);
+                } else {
+                    TCGv_i32 ftmp1, ftmp2, ftmp3;
+
+                    ftmp1 = tcg_temp_new_i32();
+                    ftmp2 = tcg_temp_new_i32();
+                    ftmp3 = tcg_temp_new_i32();
+                    tcg_gen_ld_f32(ftmp1, cpu_env, vfp_reg_offset(dp, rn));
+                    tcg_gen_ld_f32(ftmp2, cpu_env, vfp_reg_offset(dp, rm));
+                    switch (cc) {
+                    case 0: /* eq: Z */
+                        tcg_gen_movcond_i32(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
+                                            ftmp1, ftmp2);
+                        break;
+                    case 1: /* vs: V */
+                        tcg_gen_movcond_i32(TCG_COND_LT, ftmp3, cpu_VF, zero,
+                                            ftmp1, ftmp2);
+                        break;
+                    case 2: /* ge: N == V -> N ^ V == 0 */
+                        tmp = tcg_temp_new_i32();
+                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+                        tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero,
+                                            ftmp1, ftmp2);
+                        tcg_temp_free_i32(tmp);
+                        break;
+                    case 3: /* gt: !Z && N == V */
+                        tcg_gen_movcond_i32(TCG_COND_NE, ftmp3, cpu_ZF, zero,
+                                            ftmp1, ftmp2);
+                        tmp = tcg_temp_new_i32();
+                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+                        tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero,
+                                            ftmp3, ftmp2);
+                        tcg_temp_free_i32(tmp);
+                        break;
+                    }
+                    tcg_gen_st_f32(ftmp3, cpu_env, vfp_reg_offset(dp, rd));
+                    tcg_temp_free_i32(ftmp1);
+                    tcg_temp_free_i32(ftmp2);
+                    tcg_temp_free_i32(ftmp3);
+                }
+
+                return 0;
+            }
+
             veclen = s->vec_len;
             if (op == 15 && rn > 3)
                 veclen = 0;
@@ -6756,6 +6848,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 goto illegal_op;
             return;
         }
+	if ((insn & 0x0f800e50) == 0x0e000a00) {
+	    /* ARMv8 VFP.  */
+	    ARCH(8);
+
+	    if (disas_vfp_insn(env, s, insn))
+		goto illegal_op;
+	}
         if (((insn & 0x0f30f000) == 0x0510f000) ||
             ((insn & 0x0f30f010) == 0x0710f000)) {
             if ((insn & (1 << 22)) == 0) {
@@ -8768,6 +8867,12 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
             if (disas_neon_data_insn(env, s, insn))
                 goto illegal_op;
+	} else if ((insn & 0x0f800e50) == 0x0e000a00) {
+	    /* ARMv8 VFP.  */
+	    ARCH(8);
+
+	    if (disas_vfp_insn(env, s, insn))
+		goto illegal_op;
         } else {
             if (insn & (1 << 28))
                 goto illegal_op;
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 12:51 [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction Will Newton
@ 2013-10-03 12:59 ` Peter Maydell
  2013-10-03 14:31   ` Will Newton
  2013-10-03 14:34 ` Richard Henderson
  1 sibling, 1 reply; 9+ messages in thread
From: Peter Maydell @ 2013-10-03 12:59 UTC (permalink / raw)
  To: Will Newton; +Cc: QEMU Developers, Patch Tracking

On 3 October 2013 21:51, Will Newton <will.newton@linaro.org> wrote:
>
> This adds support for the VSEL floating point selection instruction
> which was added in ARMv8. It is based on the previous patch[1] from
> Mans Rullgard, but attempts to address the feedback given on that patch.
>
> [1] http://lists.nongnu.org/archive/html/qemu-devel/2013-06/msg03117.html

This sort of commentary about previous patch versions should go below
the '---', not in the commit message.

>
> Signed-off-by: Will Newton <will.newton@linaro.org>
> ---
>  target-arm/translate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 105 insertions(+)
>
> Changes in v2:
>  - Integrate vsel decoding into disas_vfp_insn
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 998bde2..5e49334 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -2880,6 +2880,98 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>                  rm = VFP_SREG_M(insn);
>              }
>
> +            if ((insn & 0x0f800e50) == 0x0e000a00) {
> +                /* vsel */
> +                uint32_t cc = (insn >> 20) & 3;
> +                TCGv_i32 tmp, zero;
> +
> +                /* ARMv8 VFP.  */
> +                if (!arm_feature(env, ARM_FEATURE_V8))
> +                    return 1;

scripts/checkpatch.pl will tell you that omitting the braces
is a coding style violation.

> +
> +                zero = tcg_const_tl(0);
> +
> +                if (dp) {
> +                    TCGv_i64 ftmp1, ftmp2, ftmp3;
> +
> +                    ftmp1 = tcg_temp_new_i64();
> +                    ftmp2 = tcg_temp_new_i64();
> +                    ftmp3 = tcg_temp_new_i64();
> +                    tcg_gen_ld_f64(ftmp1, cpu_env, vfp_reg_offset(dp, rn));
> +                    tcg_gen_ld_f64(ftmp2, cpu_env, vfp_reg_offset(dp, rm));
> +                    switch (cc) {
> +                    case 0: /* eq: Z */
> +                        tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
> +                                            ftmp1, ftmp2);
> +                        break;
> +                    case 1: /* vs: V */
> +                        tcg_gen_movcond_i64(TCG_COND_LT, ftmp3, cpu_VF, zero,
> +                                            ftmp1, ftmp2);
> +                        break;
> +                    case 2: /* ge: N == V -> N ^ V == 0 */
> +                        tmp = tcg_temp_new_i32();
> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
> +                        tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero,
> +                                            ftmp1, ftmp2);
> +                        tcg_temp_free_i32(tmp);
> +                        break;
> +                    case 3: /* gt: !Z && N == V */
> +                        tcg_gen_movcond_i64(TCG_COND_NE, ftmp3, cpu_ZF, zero,
> +                                            ftmp1, ftmp2);
> +                        tmp = tcg_temp_new_i32();
> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
> +                        tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero,
> +                                            ftmp3, ftmp2);
> +                        tcg_temp_free_i32(tmp);
> +                        break;
> +                    }
> +                    tcg_gen_st_f64(ftmp3, cpu_env, vfp_reg_offset(dp, rd));
> +                    tcg_temp_free_i64(ftmp1);
> +                    tcg_temp_free_i64(ftmp2);
> +                    tcg_temp_free_i64(ftmp3);
> +                } else {
> +                    TCGv_i32 ftmp1, ftmp2, ftmp3;
> +
> +                    ftmp1 = tcg_temp_new_i32();
> +                    ftmp2 = tcg_temp_new_i32();
> +                    ftmp3 = tcg_temp_new_i32();
> +                    tcg_gen_ld_f32(ftmp1, cpu_env, vfp_reg_offset(dp, rn));
> +                    tcg_gen_ld_f32(ftmp2, cpu_env, vfp_reg_offset(dp, rm));
> +                    switch (cc) {
> +                    case 0: /* eq: Z */
> +                        tcg_gen_movcond_i32(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
> +                                            ftmp1, ftmp2);
> +                        break;
> +                    case 1: /* vs: V */
> +                        tcg_gen_movcond_i32(TCG_COND_LT, ftmp3, cpu_VF, zero,
> +                                            ftmp1, ftmp2);
> +                        break;
> +                    case 2: /* ge: N == V -> N ^ V == 0 */
> +                        tmp = tcg_temp_new_i32();
> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
> +                        tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero,
> +                                            ftmp1, ftmp2);
> +                        tcg_temp_free_i32(tmp);
> +                        break;
> +                    case 3: /* gt: !Z && N == V */
> +                        tcg_gen_movcond_i32(TCG_COND_NE, ftmp3, cpu_ZF, zero,
> +                                            ftmp1, ftmp2);
> +                        tmp = tcg_temp_new_i32();
> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
> +                        tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero,
> +                                            ftmp3, ftmp2);
> +                        tcg_temp_free_i32(tmp);
> +                        break;
> +                    }
> +                    tcg_gen_st_f32(ftmp3, cpu_env, vfp_reg_offset(dp, rd));
> +                    tcg_temp_free_i32(ftmp1);
> +                    tcg_temp_free_i32(ftmp2);
> +                    tcg_temp_free_i32(ftmp3);
> +                }
> +
> +                return 0;
> +            }
> +
>              veclen = s->vec_len;
>              if (op == 15 && rn > 3)
>                  veclen = 0;
> @@ -6756,6 +6848,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
>                  goto illegal_op;
>              return;
>          }
> +       if ((insn & 0x0f800e50) == 0x0e000a00) {
> +           /* ARMv8 VFP.  */
> +           ARCH(8);
> +
> +           if (disas_vfp_insn(env, s, insn))
> +               goto illegal_op;
> +       }

This isn't what I meant. If our decoding matches up with the ARM ARM
then this instruction pattern should already fall into disas_vfp_insn(),
and we shouldn't need an extra check and call. (If it's not correct then
we should adjust our decode so it does.)

thanks
-- PMM

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 12:59 ` Peter Maydell
@ 2013-10-03 14:31   ` Will Newton
  2013-10-03 14:37     ` Peter Maydell
  0 siblings, 1 reply; 9+ messages in thread
From: Will Newton @ 2013-10-03 14:31 UTC (permalink / raw)
  To: Peter Maydell; +Cc: QEMU Developers, Patch Tracking

On 3 October 2013 13:59, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 3 October 2013 21:51, Will Newton <will.newton@linaro.org> wrote:
>>
>> This adds support for the VSEL floating point selection instruction
>> which was added in ARMv8. It is based on the previous patch[1] from
>> Mans Rullgard, but attempts to address the feedback given on that patch.
>>
>> [1] http://lists.nongnu.org/archive/html/qemu-devel/2013-06/msg03117.html
>
> This sort of commentary about previous patch versions should go below
> the '---', not in the commit message.
>
>>
>> Signed-off-by: Will Newton <will.newton@linaro.org>
>> ---
>>  target-arm/translate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 105 insertions(+)
>>
>> Changes in v2:
>>  - Integrate vsel decoding into disas_vfp_insn
>>
>> diff --git a/target-arm/translate.c b/target-arm/translate.c
>> index 998bde2..5e49334 100644
>> --- a/target-arm/translate.c
>> +++ b/target-arm/translate.c
>> @@ -2880,6 +2880,98 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
>>                  rm = VFP_SREG_M(insn);
>>              }
>>
>> +            if ((insn & 0x0f800e50) == 0x0e000a00) {
>> +                /* vsel */
>> +                uint32_t cc = (insn >> 20) & 3;
>> +                TCGv_i32 tmp, zero;
>> +
>> +                /* ARMv8 VFP.  */
>> +                if (!arm_feature(env, ARM_FEATURE_V8))
>> +                    return 1;
>
> scripts/checkpatch.pl will tell you that omitting the braces
> is a coding style violation.

Ok, I'll fix that.

>> +
>> +                zero = tcg_const_tl(0);
>> +
>> +                if (dp) {
>> +                    TCGv_i64 ftmp1, ftmp2, ftmp3;
>> +
>> +                    ftmp1 = tcg_temp_new_i64();
>> +                    ftmp2 = tcg_temp_new_i64();
>> +                    ftmp3 = tcg_temp_new_i64();
>> +                    tcg_gen_ld_f64(ftmp1, cpu_env, vfp_reg_offset(dp, rn));
>> +                    tcg_gen_ld_f64(ftmp2, cpu_env, vfp_reg_offset(dp, rm));
>> +                    switch (cc) {
>> +                    case 0: /* eq: Z */
>> +                        tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        break;
>> +                    case 1: /* vs: V */
>> +                        tcg_gen_movcond_i64(TCG_COND_LT, ftmp3, cpu_VF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        break;
>> +                    case 2: /* ge: N == V -> N ^ V == 0 */
>> +                        tmp = tcg_temp_new_i32();
>> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
>> +                        tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero,
>> +                                            ftmp1, ftmp2);
>> +                        tcg_temp_free_i32(tmp);
>> +                        break;
>> +                    case 3: /* gt: !Z && N == V */
>> +                        tcg_gen_movcond_i64(TCG_COND_NE, ftmp3, cpu_ZF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        tmp = tcg_temp_new_i32();
>> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
>> +                        tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero,
>> +                                            ftmp3, ftmp2);
>> +                        tcg_temp_free_i32(tmp);
>> +                        break;
>> +                    }
>> +                    tcg_gen_st_f64(ftmp3, cpu_env, vfp_reg_offset(dp, rd));
>> +                    tcg_temp_free_i64(ftmp1);
>> +                    tcg_temp_free_i64(ftmp2);
>> +                    tcg_temp_free_i64(ftmp3);
>> +                } else {
>> +                    TCGv_i32 ftmp1, ftmp2, ftmp3;
>> +
>> +                    ftmp1 = tcg_temp_new_i32();
>> +                    ftmp2 = tcg_temp_new_i32();
>> +                    ftmp3 = tcg_temp_new_i32();
>> +                    tcg_gen_ld_f32(ftmp1, cpu_env, vfp_reg_offset(dp, rn));
>> +                    tcg_gen_ld_f32(ftmp2, cpu_env, vfp_reg_offset(dp, rm));
>> +                    switch (cc) {
>> +                    case 0: /* eq: Z */
>> +                        tcg_gen_movcond_i32(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        break;
>> +                    case 1: /* vs: V */
>> +                        tcg_gen_movcond_i32(TCG_COND_LT, ftmp3, cpu_VF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        break;
>> +                    case 2: /* ge: N == V -> N ^ V == 0 */
>> +                        tmp = tcg_temp_new_i32();
>> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
>> +                        tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero,
>> +                                            ftmp1, ftmp2);
>> +                        tcg_temp_free_i32(tmp);
>> +                        break;
>> +                    case 3: /* gt: !Z && N == V */
>> +                        tcg_gen_movcond_i32(TCG_COND_NE, ftmp3, cpu_ZF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        tmp = tcg_temp_new_i32();
>> +                        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
>> +                        tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero,
>> +                                            ftmp3, ftmp2);
>> +                        tcg_temp_free_i32(tmp);
>> +                        break;
>> +                    }
>> +                    tcg_gen_st_f32(ftmp3, cpu_env, vfp_reg_offset(dp, rd));
>> +                    tcg_temp_free_i32(ftmp1);
>> +                    tcg_temp_free_i32(ftmp2);
>> +                    tcg_temp_free_i32(ftmp3);
>> +                }
>> +
>> +                return 0;
>> +            }
>> +
>>              veclen = s->vec_len;
>>              if (op == 15 && rn > 3)
>>                  veclen = 0;
>> @@ -6756,6 +6848,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
>>                  goto illegal_op;
>>              return;
>>          }
>> +       if ((insn & 0x0f800e50) == 0x0e000a00) {
>> +           /* ARMv8 VFP.  */
>> +           ARCH(8);
>> +
>> +           if (disas_vfp_insn(env, s, insn))
>> +               goto illegal_op;
>> +       }
>
> This isn't what I meant. If our decoding matches up with the ARM ARM
> then this instruction pattern should already fall into disas_vfp_insn(),
> and we shouldn't need an extra check and call. (If it's not correct then
> we should adjust our decode so it does.)

I'll respin the patch pulling the calls to disas_vfp_insn up a level
which I think you alluded to in the original review. It still needs an
additional call to disas_vfp_insn in the ARM case as condition code ==
0xf is dealt with separately from the others. Let me know if this is
not what you were looking for.

Thanks,

-- 
Will Newton
Toolchain Working Group, Linaro

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 12:51 [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction Will Newton
  2013-10-03 12:59 ` Peter Maydell
@ 2013-10-03 14:34 ` Richard Henderson
  2013-10-03 15:10   ` Will Newton
  1 sibling, 1 reply; 9+ messages in thread
From: Richard Henderson @ 2013-10-03 14:34 UTC (permalink / raw)
  To: Will Newton; +Cc: qemu-devel, patches

On 10/03/2013 05:51 AM, Will Newton wrote:
> +                    case 0: /* eq: Z */
> +                        tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
> +                                            ftmp1, ftmp2);
> +                        break;

Does this compile when configured with --enable-debug?

It shouldn't, since movcond_i64 takes 5 _i64 variables,
and your comparison variables are _i32.


r~

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 14:31   ` Will Newton
@ 2013-10-03 14:37     ` Peter Maydell
  2013-10-15 10:31       ` Peter Maydell
  0 siblings, 1 reply; 9+ messages in thread
From: Peter Maydell @ 2013-10-03 14:37 UTC (permalink / raw)
  To: Will Newton; +Cc: QEMU Developers, Patch Tracking

On 3 October 2013 23:31, Will Newton <will.newton@linaro.org> wrote:
> On 3 October 2013 13:59, Peter Maydell <peter.maydell@linaro.org> wrote:
>> This isn't what I meant. If our decoding matches up with the ARM ARM
>> then this instruction pattern should already fall into disas_vfp_insn(),
>> and we shouldn't need an extra check and call. (If it's not correct then
>> we should adjust our decode so it does.)
>
> I'll respin the patch pulling the calls to disas_vfp_insn up a level
> which I think you alluded to in the original review. It still needs an
> additional call to disas_vfp_insn in the ARM case as condition code ==
> 0xf is dealt with separately from the others. Let me know if this is
> not what you were looking for.

Ah, that means the ARM ARM table is incorrect, because it implies
that VSEL is conditional (which it definitely isn't). I need to look
at where the new insns are in the T32/A32 encodings in more
detail, then, which I don't have time for just at the moment.

Pulling the disas_vfp_insn calls out of disas_coproc is a good
idea anyway, though (it should be a separate patch to the one
which adds VSEL).

-- PMM

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 14:34 ` Richard Henderson
@ 2013-10-03 15:10   ` Will Newton
  2013-10-03 15:28     ` Richard Henderson
  0 siblings, 1 reply; 9+ messages in thread
From: Will Newton @ 2013-10-03 15:10 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers, Patch Tracking

On 3 October 2013 15:34, Richard Henderson <rth@twiddle.net> wrote:
> On 10/03/2013 05:51 AM, Will Newton wrote:
>> +                    case 0: /* eq: Z */
>> +                        tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero,
>> +                                            ftmp1, ftmp2);
>> +                        break;
>
> Does this compile when configured with --enable-debug?
>
> It shouldn't, since movcond_i64 takes 5 _i64 variables,
> and your comparison variables are _i32.

No, thanks for picking that up. I was wondering if that was valid and
the code seemed to work. What's the best way to work around the
problem? Just extend everything up to 64bits?

-- 
Will Newton
Toolchain Working Group, Linaro

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 15:10   ` Will Newton
@ 2013-10-03 15:28     ` Richard Henderson
  2013-10-03 18:39       ` Richard Henderson
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Henderson @ 2013-10-03 15:28 UTC (permalink / raw)
  To: Will Newton; +Cc: QEMU Developers, Patch Tracking

On 10/03/2013 08:10 AM, Will Newton wrote:
> No, thanks for picking that up. I was wondering if that was valid and
> the code seemed to work. What's the best way to work around the
> problem? Just extend everything up to 64bits?

For the simple conditions, yes.  For the more complex ones,
you might want to do the computation in 32-bit and extend
the result.


r~

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 15:28     ` Richard Henderson
@ 2013-10-03 18:39       ` Richard Henderson
  0 siblings, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2013-10-03 18:39 UTC (permalink / raw)
  To: Will Newton; +Cc: QEMU Developers, Patch Tracking

On 10/03/2013 10:28 AM, Richard Henderson wrote:
> For the simple conditions, yes.  For the more complex ones,
> you might want to do the computation in 32-bit and extend
> the result.

Alternately, compute the condition with setcond_i32 and
only extend that result to 64 bits.  That means doing
something different with GT with its compound.  Maybe

  xor_i32     tmp, vf, nf
  setcond_i32 tmp, tmp, zero, ge
  movcond_i32 tmp, zf, zero, tmp, zero, ne  (tmp = z ? tmp : 0)

r~

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction.
  2013-10-03 14:37     ` Peter Maydell
@ 2013-10-15 10:31       ` Peter Maydell
  0 siblings, 0 replies; 9+ messages in thread
From: Peter Maydell @ 2013-10-15 10:31 UTC (permalink / raw)
  To: Will Newton; +Cc: QEMU Developers, Patch Tracking

On 3 October 2013 15:37, Peter Maydell <peter.maydell@linaro.org> wrote:
> Ah, that means the ARM ARM table is incorrect, because it implies
> that VSEL is conditional (which it definitely isn't). I need to look
> at where the new insns are in the T32/A32 encodings in more
> detail, then, which I don't have time for just at the moment.

Yes, these are in what would be the CDP2 space in both T32
and A32. So, quick sketch of what I think we should do:
 * move the disas_vfp_insn() calls outside disas_coproc_insn()
   (and in the thumb decode case, to before the "if bit 28 set
   then goto illegal_op" check)
   (basically what you have in this patch is fine)
 * add a call to disas_vfp_insn() in the unconditional code
   (what you have there in this patch is fine, but remember that
   QEMU coding style mandates braces; use scripts/checkpatch.pl.)
 * in disas_vfp_insn(), just after the "is vfp disabled?" check, add:

 if (extract32(insn, 28, 4) == 0xf) {
    /* Encodings with T=1 (Thumb) or unconditional (ARM):
     * only used in v8 and above
     */
    return 1;
 }

That all goes into patch 1 of 2, which is just doing refactoring
and makes no changes in behaviour.

 * then in patch 2 of the series, actually add the VSEL
   support, by replacing that 'return 1' with
   'return disas_vfp_v8_insn(env, s, insn);'
   and implementing that function with the VSEL support.
   [It seems better to me to have this separate rather than
   fully integrated into the existing logic of disas_vfp_insn
   because we know that no new insn is ever going to use the
   legacy/deprecated vfp vector support. And the function is
   already 800 lines long...]

thanks
-- PMM

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-10-15 10:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-10-03 12:51 [Qemu-devel] [PATCH v2] target-arm: Implement ARMv8 VSEL instruction Will Newton
2013-10-03 12:59 ` Peter Maydell
2013-10-03 14:31   ` Will Newton
2013-10-03 14:37     ` Peter Maydell
2013-10-15 10:31       ` Peter Maydell
2013-10-03 14:34 ` Richard Henderson
2013-10-03 15:10   ` Will Newton
2013-10-03 15:28     ` Richard Henderson
2013-10-03 18:39       ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).