Re: [Qemu-devel] [PATCH v11 17/20] target/arm: Use vector infrastructure for aa64 multiplies

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, peter.maydell@linaro.org
Subject: Re: [Qemu-devel] [PATCH v11 17/20] target/arm: Use vector infrastructure for aa64 multiplies
Date: Tue, 06 Feb 2018 11:11:14 +0000	[thread overview]
Message-ID: <87inbawey5.fsf@linaro.org> (raw)
In-Reply-To: <20180126045742.5487-18-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/arm/translate-a64.c | 154 +++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 129 insertions(+), 25 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index c928c4787c..64a2c2df59 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -9753,6 +9753,66 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
>      }
>  }
>
> +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
> +{
> +    gen_helper_neon_mul_u8(a, a, b);
> +    gen_helper_neon_add_u8(d, d, a);
> +}
> +
> +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
> +{
> +    gen_helper_neon_mul_u16(a, a, b);
> +    gen_helper_neon_add_u16(d, d, a);
> +}
> +
> +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
> +{
> +    tcg_gen_mul_i32(a, a, b);
> +    tcg_gen_add_i32(d, d, a);
> +}
> +
> +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
> +{
> +    tcg_gen_mul_i64(a, a, b);
> +    tcg_gen_add_i64(d, d, a);
> +}
> +
> +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
> +{
> +    tcg_gen_mul_vec(vece, a, a, b);
> +    tcg_gen_add_vec(vece, d, d, a);
> +}
> +
> +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
> +{
> +    gen_helper_neon_mul_u8(a, a, b);
> +    gen_helper_neon_sub_u8(d, d, a);
> +}
> +
> +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
> +{
> +    gen_helper_neon_mul_u16(a, a, b);
> +    gen_helper_neon_sub_u16(d, d, a);
> +}
> +
> +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
> +{
> +    tcg_gen_mul_i32(a, a, b);
> +    tcg_gen_sub_i32(d, d, a);
> +}
> +
> +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
> +{
> +    tcg_gen_mul_i64(a, a, b);
> +    tcg_gen_sub_i64(d, d, a);
> +}
> +
> +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
> +{
> +    tcg_gen_mul_vec(vece, a, a, b);
> +    tcg_gen_sub_vec(vece, d, d, a);
> +}
> +
>  /* Integer op subgroup of C3.6.16. */
>  static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
>  {
> @@ -9771,6 +9831,52 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
>            .prefer_i64 = TCG_TARGET_REG_BITS == 64,
>            .vece = MO_64 },
>      };
> +    static const GVecGen3 mla_op[4] = {
> +        { .fni4 = gen_mla8_i32,
> +          .fniv = gen_mla_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .load_dest = true,
> +          .vece = MO_8 },
> +        { .fni4 = gen_mla16_i32,
> +          .fniv = gen_mla_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .load_dest = true,
> +          .vece = MO_16 },
> +        { .fni4 = gen_mla32_i32,
> +          .fniv = gen_mla_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .load_dest = true,
> +          .vece = MO_32 },
> +        { .fni8 = gen_mla64_i64,
> +          .fniv = gen_mla_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
> +          .load_dest = true,
> +          .vece = MO_64 },
> +    };
> +    static const GVecGen3 mls_op[4] = {
> +        { .fni4 = gen_mls8_i32,
> +          .fniv = gen_mls_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .load_dest = true,
> +          .vece = MO_8 },
> +        { .fni4 = gen_mls16_i32,
> +          .fniv = gen_mls_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .load_dest = true,
> +          .vece = MO_16 },
> +        { .fni4 = gen_mls32_i32,
> +          .fniv = gen_mls_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .load_dest = true,
> +          .vece = MO_32 },
> +        { .fni8 = gen_mls64_i64,
> +          .fniv = gen_mls_vec,
> +          .opc = INDEX_op_mul_vec,
> +          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
> +          .load_dest = true,
> +          .vece = MO_64 },
> +    };
>
>      int is_q = extract32(insn, 30, 1);
>      int u = extract32(insn, 29, 1);
> @@ -9828,6 +9934,19 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
>              gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
>          }
>          return;
> +    case 0x13: /* MUL, PMUL */
> +        if (!u) { /* MUL */
> +            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
> +            return;
> +        }
> +        break;
> +    case 0x12: /* MLA, MLS */
> +        if (u) {
> +            gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
> +        } else {
> +            gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
> +        }
> +        return;
>      case 0x11:
>          if (!u) { /* CMTST */
>              gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
> @@ -10002,23 +10121,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
>                  break;
>              }
>              case 0x13: /* MUL, PMUL */
> -                if (u) {
> -                    /* PMUL */
> -                    assert(size == 0);
> -                    genfn = gen_helper_neon_mul_p8;
> -                    break;
> -                }
> -                /* fall through : MUL */
> -            case 0x12: /* MLA, MLS */
> -            {
> -                static NeonGenTwoOpFn * const fns[3] = {
> -                    gen_helper_neon_mul_u8,
> -                    gen_helper_neon_mul_u16,
> -                    tcg_gen_mul_i32,
> -                };
> -                genfn = fns[size];
> +                assert(u); /* PMUL */
> +                assert(size == 0);
> +                genfn = gen_helper_neon_mul_p8;
>                  break;
> -            }
>              case 0x16: /* SQDMULH, SQRDMULH */
>              {
>                  static NeonGenTwoOpEnvFn * const fns[2][2] = {
> @@ -10039,18 +10145,16 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
>                  genfn(tcg_res, tcg_op1, tcg_op2);
>              }
>
> -            if (opcode == 0xf || opcode == 0x12) {
> -                /* SABA, UABA, MLA, MLS: accumulating ops */
> -                static NeonGenTwoOpFn * const fns[3][2] = {
> -                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
> -                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
> -                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
> +            if (opcode == 0xf) {
> +                /* SABA, UABA: accumulating ops */
> +                static NeonGenTwoOpFn * const fns[3] = {
> +                    gen_helper_neon_add_u8,
> +                    gen_helper_neon_add_u16,
> +                    tcg_gen_add_i32,
>                  };
> -                bool is_sub = (opcode == 0x12 && u); /* MLS */
>
> -                genfn = fns[size][is_sub];
>                  read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
> -                genfn(tcg_res, tcg_op1, tcg_res);
> +                fns[size](tcg_res, tcg_op1, tcg_res);
>              }
>
>              write_vec_element_i32(s, tcg_res, rd, pass, MO_32);


--
Alex Bennée

next prev parent reply	other threads:[~2018-02-06 11:11 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-26  4:57 [Qemu-devel] [PATCH v11 00/20] tcg: generic vector operations Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 01/20] tcg: Allow multiple word entries into the constant pool Richard Henderson
2018-02-06  8:51   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 02/20] tcg: Add types and basic operations for host vectors Richard Henderson
2018-02-06  8:53   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 03/20] tcg: Standardize integral arguments to expanders Richard Henderson
2018-02-06  8:57   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 04/20] tcg: Add generic vector expanders Richard Henderson
2018-02-06 10:59   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 05/20] tcg: Add generic vector ops for constant shifts Richard Henderson
2018-02-06 11:00   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 06/20] tcg: Add generic vector ops for comparisons Richard Henderson
2018-02-06 11:01   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 07/20] tcg: Add generic vector ops for multiplication Richard Henderson
2018-02-06 11:02   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 08/20] tcg: Add generic helpers for saturating arithmetic Richard Henderson
2018-02-06 11:03   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 09/20] tcg: Add generic vector helpers with a scalar operand Richard Henderson
2018-02-06 11:04   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 10/20] tcg/optimize: Handle vector opcodes during optimize Richard Henderson
2018-02-06 11:07   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 11/20] target/arm: Align vector registers Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 12/20] target/arm: Use vector infrastructure for aa64 add/sub/logic Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 13/20] target/arm: Use vector infrastructure for aa64 mov/not/neg Richard Henderson
2018-02-06 11:08   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 14/20] target/arm: Use vector infrastructure for aa64 dup/movi Richard Henderson
2018-02-06 11:09   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 15/20] target/arm: Use vector infrastructure for aa64 constant shifts Richard Henderson
2018-02-05 11:14   ` Peter Maydell
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 16/20] target/arm: Use vector infrastructure for aa64 compares Richard Henderson
2018-02-06 11:10   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 17/20] target/arm: Use vector infrastructure for aa64 multiplies Richard Henderson
2018-02-06 11:11   ` Alex Bennée [this message]
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 18/20] target/arm: Use vector infrastructure for aa64 orr/bic immediate Richard Henderson
2018-02-06 11:13   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 19/20] tcg/i386: Add vector operations Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 20/20] tcg/aarch64: " Richard Henderson
2018-02-06 11:15   ` Alex Bennée
2018-01-26 17:25 ` [Qemu-devel] [PATCH v11 00/20] tcg: generic " no-reply
2018-02-06 11:24 ` Alex Bennée
2018-02-06 12:07   ` Philippe Mathieu-Daudé
2018-02-06 12:36     ` Alex Bennée
2018-02-06 16:24 ` Alex Bennée
2018-02-06 20:57   ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87inbawey5.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.