All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org
Subject: Re: [Qemu-arm] [PATCH v1 06/12] target/arm: Decode aa32 armv8.1 three same
Date: Mon, 13 Nov 2017 16:55:06 +0000	[thread overview]
Message-ID: <87shdi9l05.fsf@linaro.org> (raw)
In-Reply-To: <20171004184325.24157-7-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate.c | 83 ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 64 insertions(+), 19 deletions(-)
>
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index ab1a12a1b8..0cd58710b3 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -25,6 +25,7 @@
>  #include "disas/disas.h"
>  #include "exec/exec-all.h"
>  #include "tcg-op.h"
> +#include "tcg-op-gvec.h"
>  #include "qemu/log.h"
>  #include "qemu/bitops.h"
>  #include "arm_ldst.h"
> @@ -5334,9 +5335,9 @@ static void gen_neon_narrow_op(int op, int u, int size,
>  #define NEON_3R_VPMAX 20
>  #define NEON_3R_VPMIN 21
>  #define NEON_3R_VQDMULH_VQRDMULH 22
> -#define NEON_3R_VPADD 23
> +#define NEON_3R_VPADD_VQRDMLAH 23
>  #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
> -#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
> +#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */
>  #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
>  #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
>  #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
> @@ -5368,9 +5369,9 @@ static const uint8_t neon_3r_sizes[] = {
>      [NEON_3R_VPMAX] = 0x7,
>      [NEON_3R_VPMIN] = 0x7,
>      [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
> -    [NEON_3R_VPADD] = 0x7,
> +    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
>      [NEON_3R_SHA] = 0xf, /* size field encodes op type */
> -    [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
> +    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
>      [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
>      [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
>      [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
> @@ -5556,6 +5557,7 @@ static const uint8_t neon_2rm_sizes[] = {
>
>  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>  {
> +    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
>      int op;
>      int q;
>      int rd, rn, rm;
> @@ -5600,12 +5602,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>          if (q && ((rd | rn | rm) & 1)) {
>              return 1;
>          }
> -        /*
> -         * The SHA-1/SHA-256 3-register instructions require special treatment
> -         * here, as their size field is overloaded as an op type selector, and
> -         * they all consume their input in a single pass.
> -         */
> -        if (op == NEON_3R_SHA) {
> +        switch (op) {
> +        case NEON_3R_SHA:
> +            /* The SHA-1/SHA-256 3-register instructions require special
> +             * treatment here, as their size field is overloaded as an
> +             * op type selector, and they all consume their input in a
> +             * single pass.  */
>              if (!q) {
>                  return 1;
>              }
> @@ -5642,6 +5644,53 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>              tcg_temp_free_i32(tmp2);
>              tcg_temp_free_i32(tmp3);
>              return 0;
> +
> +        case NEON_3R_VPADD_VQRDMLAH:
> +            if (!u) {
> +                break;  /* VPADD */
> +            }
> +            /* VQRDMLAH */
> +            switch (size) {
> +            case 1:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;
> +                break;
> +            case 2:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;
> +                break;
> +            default:
> +                return 1;
> +            }
> +        do_vqrdmlx:
> +            if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
> +                int opr_sz = (1 + q) * 8;
> +                tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
> +                                   vfp_reg_offset(1, rn),
> +                                   vfp_reg_offset(1, rm), cpu_env,
> +                                   opr_sz, opr_sz, 0, fn_gvec_ptr);
> +                return 0;
> +            }
> +            return 1;
> +
> +        case NEON_3R_VFM_VQRDMLSH:
> +            if (!u) {
> +                /* VFM, VFMS */
> +                if ((5 & (1 << size)) == 0) {
> +                    return 1;
> +                }
> +                break;
> +            }
> +            /* VQRDMLSH */
> +            switch (size) {
> +            case 1:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;
> +                break;
> +            case 2:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;
> +                break;
> +            default:
> +                return 1;
> +            }
> +            goto do_vqrdmlx;

Could we not take the opportunity to re-factor out the common bit rather
than make this mega function even more byzantine?

>          }
>          if (size == 3 && op != NEON_3R_LOGIC) {
>              /* 64-bit element instructions. */
> @@ -5727,11 +5776,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                  rm = rtmp;
>              }
>              break;
> -        case NEON_3R_VPADD:
> -            if (u) {
> -                return 1;
> -            }
> -            /* Fall through */
> +        case NEON_3R_VPADD_VQRDMLAH:
>          case NEON_3R_VPMAX:
>          case NEON_3R_VPMIN:
>              pairwise = 1;
> @@ -5765,8 +5810,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                  return 1;
>              }
>              break;
> -        case NEON_3R_VFM:
> -            if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
> +        case NEON_3R_VFM_VQRDMLSH:
> +            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
>                  return 1;
>              }
>              break;
> @@ -5963,7 +6008,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                  }
>              }
>              break;
> -        case NEON_3R_VPADD:
> +        case NEON_3R_VPADD_VQRDMLAH:
>              switch (size) {
>              case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
>              case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
> @@ -6062,7 +6107,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                }
>              }
>              break;
> -        case NEON_3R_VFM:
> +        case NEON_3R_VFM_VQRDMLSH:
>          {
>              /* VFMA, VFMS: fused multiply-add */
>              TCGv_ptr fpstatus = get_fpstatus_ptr(1);


--
Alex Bennée

WARNING: multiple messages have this Message-ID (diff)
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org
Subject: Re: [Qemu-devel] [Qemu-arm] [PATCH v1 06/12] target/arm: Decode aa32 armv8.1 three same
Date: Mon, 13 Nov 2017 16:55:06 +0000	[thread overview]
Message-ID: <87shdi9l05.fsf@linaro.org> (raw)
In-Reply-To: <20171004184325.24157-7-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate.c | 83 ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 64 insertions(+), 19 deletions(-)
>
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index ab1a12a1b8..0cd58710b3 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -25,6 +25,7 @@
>  #include "disas/disas.h"
>  #include "exec/exec-all.h"
>  #include "tcg-op.h"
> +#include "tcg-op-gvec.h"
>  #include "qemu/log.h"
>  #include "qemu/bitops.h"
>  #include "arm_ldst.h"
> @@ -5334,9 +5335,9 @@ static void gen_neon_narrow_op(int op, int u, int size,
>  #define NEON_3R_VPMAX 20
>  #define NEON_3R_VPMIN 21
>  #define NEON_3R_VQDMULH_VQRDMULH 22
> -#define NEON_3R_VPADD 23
> +#define NEON_3R_VPADD_VQRDMLAH 23
>  #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
> -#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
> +#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */
>  #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
>  #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
>  #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
> @@ -5368,9 +5369,9 @@ static const uint8_t neon_3r_sizes[] = {
>      [NEON_3R_VPMAX] = 0x7,
>      [NEON_3R_VPMIN] = 0x7,
>      [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
> -    [NEON_3R_VPADD] = 0x7,
> +    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
>      [NEON_3R_SHA] = 0xf, /* size field encodes op type */
> -    [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
> +    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
>      [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
>      [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
>      [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
> @@ -5556,6 +5557,7 @@ static const uint8_t neon_2rm_sizes[] = {
>
>  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>  {
> +    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
>      int op;
>      int q;
>      int rd, rn, rm;
> @@ -5600,12 +5602,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>          if (q && ((rd | rn | rm) & 1)) {
>              return 1;
>          }
> -        /*
> -         * The SHA-1/SHA-256 3-register instructions require special treatment
> -         * here, as their size field is overloaded as an op type selector, and
> -         * they all consume their input in a single pass.
> -         */
> -        if (op == NEON_3R_SHA) {
> +        switch (op) {
> +        case NEON_3R_SHA:
> +            /* The SHA-1/SHA-256 3-register instructions require special
> +             * treatment here, as their size field is overloaded as an
> +             * op type selector, and they all consume their input in a
> +             * single pass.  */
>              if (!q) {
>                  return 1;
>              }
> @@ -5642,6 +5644,53 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>              tcg_temp_free_i32(tmp2);
>              tcg_temp_free_i32(tmp3);
>              return 0;
> +
> +        case NEON_3R_VPADD_VQRDMLAH:
> +            if (!u) {
> +                break;  /* VPADD */
> +            }
> +            /* VQRDMLAH */
> +            switch (size) {
> +            case 1:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;
> +                break;
> +            case 2:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;
> +                break;
> +            default:
> +                return 1;
> +            }
> +        do_vqrdmlx:
> +            if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
> +                int opr_sz = (1 + q) * 8;
> +                tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
> +                                   vfp_reg_offset(1, rn),
> +                                   vfp_reg_offset(1, rm), cpu_env,
> +                                   opr_sz, opr_sz, 0, fn_gvec_ptr);
> +                return 0;
> +            }
> +            return 1;
> +
> +        case NEON_3R_VFM_VQRDMLSH:
> +            if (!u) {
> +                /* VFM, VFMS */
> +                if ((5 & (1 << size)) == 0) {
> +                    return 1;
> +                }
> +                break;
> +            }
> +            /* VQRDMLSH */
> +            switch (size) {
> +            case 1:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;
> +                break;
> +            case 2:
> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;
> +                break;
> +            default:
> +                return 1;
> +            }
> +            goto do_vqrdmlx;

Could we not take the opportunity to re-factor out the common bit rather
than make this mega function even more byzantine?

>          }
>          if (size == 3 && op != NEON_3R_LOGIC) {
>              /* 64-bit element instructions. */
> @@ -5727,11 +5776,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                  rm = rtmp;
>              }
>              break;
> -        case NEON_3R_VPADD:
> -            if (u) {
> -                return 1;
> -            }
> -            /* Fall through */
> +        case NEON_3R_VPADD_VQRDMLAH:
>          case NEON_3R_VPMAX:
>          case NEON_3R_VPMIN:
>              pairwise = 1;
> @@ -5765,8 +5810,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                  return 1;
>              }
>              break;
> -        case NEON_3R_VFM:
> -            if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
> +        case NEON_3R_VFM_VQRDMLSH:
> +            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
>                  return 1;
>              }
>              break;
> @@ -5963,7 +6008,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                  }
>              }
>              break;
> -        case NEON_3R_VPADD:
> +        case NEON_3R_VPADD_VQRDMLAH:
>              switch (size) {
>              case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
>              case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
> @@ -6062,7 +6107,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
>                }
>              }
>              break;
> -        case NEON_3R_VFM:
> +        case NEON_3R_VFM_VQRDMLSH:
>          {
>              /* VFMA, VFMS: fused multiply-add */
>              TCGv_ptr fpstatus = get_fpstatus_ptr(1);


--
Alex Bennée

  reply	other threads:[~2017-11-13 16:55 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-04 18:43 [Qemu-arm] [PATCH v1 00/12] ARM v8.1 simd + v8.3 complex insns Richard Henderson
2017-10-04 18:43 ` [Qemu-devel] " Richard Henderson
2017-10-04 18:43 ` [Qemu-arm] [PATCH v1 01/12] HACK: use objdump disas Richard Henderson
2017-10-04 18:43   ` [Qemu-devel] " Richard Henderson
2017-11-13 11:33   ` [Qemu-arm] " Alex Bennée
2017-11-13 11:33     ` [Qemu-devel] " Alex Bennée
2017-11-14  8:38     ` Richard Henderson
2017-11-14  8:38       ` [Qemu-devel] " Richard Henderson
2017-10-04 18:43 ` [Qemu-arm] [PATCH v1 02/12] target/arm: Add ARM_FEATURE_V8_1_SIMD Richard Henderson
2017-10-04 18:43   ` [Qemu-devel] " Richard Henderson
2017-11-13 11:34   ` [Qemu-arm] " Alex Bennée
2017-11-13 11:34     ` [Qemu-devel] " Alex Bennée
2017-10-04 18:43 ` [Qemu-arm] [PATCH v1 03/12] target/arm: Decode aa64 armv8.1 scalar three same extra Richard Henderson
2017-10-04 18:43   ` [Qemu-devel] " Richard Henderson
2017-11-13 16:30   ` [Qemu-arm] " Alex Bennée
2017-11-13 16:30     ` [Qemu-devel] " Alex Bennée
2017-11-13 16:42     ` Peter Maydell
2017-11-14  8:44     ` Richard Henderson
2017-11-14  8:44       ` [Qemu-devel] " Richard Henderson
2017-10-04 18:43 ` [Qemu-devel] [PATCH v1 04/12] target/arm: Decode aa64 armv8.1 " Richard Henderson
2017-11-13 16:41   ` Alex Bennée
2017-10-04 18:43 ` [Qemu-arm] [PATCH v1 05/12] target/arm: Decode aa64 armv8.1 scalar/vector x indexed element Richard Henderson
2017-10-04 18:43   ` [Qemu-devel] " Richard Henderson
2017-11-13 16:44   ` [Qemu-arm] " Alex Bennée
2017-11-13 16:44     ` [Qemu-devel] " Alex Bennée
2017-10-04 18:43 ` [Qemu-arm] [PATCH v1 06/12] target/arm: Decode aa32 armv8.1 three same Richard Henderson
2017-10-04 18:43   ` [Qemu-devel] " Richard Henderson
2017-11-13 16:55   ` Alex Bennée [this message]
2017-11-13 16:55     ` [Qemu-devel] [Qemu-arm] " Alex Bennée
2017-11-14  8:46     ` Richard Henderson
2017-11-14  8:46       ` [Qemu-devel] " Richard Henderson
2017-11-14 10:06       ` Alex Bennée
2017-11-14 10:06         ` [Qemu-devel] " Alex Bennée
2017-11-14 10:46         ` Richard Henderson
2017-11-14 10:46           ` [Qemu-devel] " Richard Henderson
2017-10-04 18:43 ` [Qemu-devel] [PATCH v1 07/12] target/arm: Decode aa32 armv8.1 two reg and a scalar Richard Henderson
2017-11-13 17:05   ` Alex Bennée
2017-11-22 13:12     ` Richard Henderson
2017-10-04 18:43 ` [Qemu-arm] [PATCH v1 08/12] target/arm: Add ARM_FEATURE_V8_FCMA Richard Henderson
2017-10-04 18:43   ` [Qemu-devel] " Richard Henderson
2017-11-13 17:06   ` [Qemu-arm] " Alex Bennée
2017-11-13 17:06     ` [Qemu-devel] " Alex Bennée
2017-10-04 18:43 ` [Qemu-devel] [PATCH v1 09/12] target/arm: Decode aa64 armv8.3 fcadd Richard Henderson
2017-11-13 17:12   ` Alex Bennée
2017-10-04 18:43 ` [Qemu-devel] [PATCH v1 10/12] target/arm: Decode aa64 armv8.3 fcmla Richard Henderson
2017-10-04 18:43 ` [Qemu-devel] [PATCH v1 11/12] target/arm: Decode aa32 armv8.3 3-same Richard Henderson
2017-10-04 18:43 ` [Qemu-devel] [PATCH v1 12/12] target/arm: Decode aa32 armv8.3 2-reg-index Richard Henderson
2017-10-04 18:58 ` [Qemu-devel] [PATCH v1 00/12] ARM v8.1 simd + v8.3 complex insns no-reply
2017-10-04 18:58   ` no-reply
2017-10-04 18:58 ` [Qemu-arm] " no-reply
2017-10-04 18:58   ` no-reply
2017-10-04 18:58 ` [Qemu-arm] " no-reply
2017-10-04 18:58   ` no-reply
2017-11-13 17:16 ` [Qemu-arm] " Alex Bennée
2017-11-13 17:16   ` [Qemu-devel] " Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87shdi9l05.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.