From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, f4bug@amsat.org
Subject: Re: [Qemu-devel] [PATCH v3 4/6] target/arm: Use vector infrastructure for aa64 add/sub/logic
Date: Wed, 27 Sep 2017 00:12:59 +0100 [thread overview]
Message-ID: <87o9pxcav8.fsf@linaro.org> (raw)
In-Reply-To: <20170916023417.14599-5-richard.henderson@linaro.org>
Richard Henderson <richard.henderson@linaro.org> writes:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> ---
> target/arm/translate-a64.c | 216 ++++++++++++++++++++++++++++++---------------
> 1 file changed, 143 insertions(+), 73 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index a3984c9a0d..4759cc9829 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -21,6 +21,7 @@
> #include "cpu.h"
> #include "exec/exec-all.h"
> #include "tcg-op.h"
> +#include "tcg-op-gvec.h"
> #include "qemu/log.h"
> #include "arm_ldst.h"
> #include "translate.h"
> @@ -82,6 +83,7 @@ typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
> typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
> typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
> typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
> +typedef void GVecGenTwoFn(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
>
> /* initialize TCG globals. */
> void a64_translate_init(void)
> @@ -537,6 +539,21 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
> return offs;
> }
>
> +/* Return the offset info CPUARMState of the "whole" vector register Qn. */
> +static inline int vec_full_reg_offset(DisasContext *s, int regno)
> +{
> + assert_fp_access_checked(s);
> + return offsetof(CPUARMState, vfp.regs[regno * 2]);
> +}
> +
> +/* Return the byte size of the "whole" vector register, VL / 8. */
> +static inline int vec_full_reg_size(DisasContext *s)
> +{
> + /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
> + In the meantime this is just the AdvSIMD length of 128. */
> + return 128 / 8;
> +}
> +
> /* Return the offset into CPUARMState of a slice (from
> * the least significant end) of FP register Qn (ie
> * Dn, Sn, Hn or Bn).
> @@ -9036,85 +9053,125 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
> }
> }
>
> +static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
> +{
> + tcg_gen_xor_i64(rn, rn, rm);
> + tcg_gen_and_i64(rn, rn, rd);
> + tcg_gen_xor_i64(rd, rm, rn);
> +}
> +
> +static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
> +{
> + tcg_gen_xor_i64(rn, rn, rd);
> + tcg_gen_and_i64(rn, rn, rm);
> + tcg_gen_xor_i64(rd, rd, rn);
> +}
> +
> +static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
> +{
> + tcg_gen_xor_i64(rn, rn, rd);
> + tcg_gen_andc_i64(rn, rn, rm);
> + tcg_gen_xor_i64(rd, rd, rn);
> +}
> +
> +static void gen_bsl_vec(TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
> +{
> + tcg_gen_xor_vec(rn, rn, rm);
> + tcg_gen_and_vec(rn, rn, rd);
> + tcg_gen_xor_vec(rd, rm, rn);
> +}
> +
> +static void gen_bit_vec(TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
> +{
> + tcg_gen_xor_vec(rn, rn, rd);
> + tcg_gen_and_vec(rn, rn, rm);
> + tcg_gen_xor_vec(rd, rd, rn);
> +}
> +
> +static void gen_bif_vec(TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
> +{
> + tcg_gen_xor_vec(rn, rn, rd);
> + tcg_gen_andc_vec(rn, rn, rm);
> + tcg_gen_xor_vec(rd, rd, rn);
> +}
> +
> /* Logic op (opcode == 3) subgroup of C3.6.16. */
> static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
> {
> + static const GVecGen3 bsl_op = {
> + .fni8 = gen_bsl_i64,
> + .fniv = gen_bsl_vec,
> + .prefer_i64 = TCG_TARGET_REG_BITS == 64,
> + .load_dest = true
> + };
> + static const GVecGen3 bit_op = {
> + .fni8 = gen_bit_i64,
> + .fniv = gen_bit_vec,
> + .prefer_i64 = TCG_TARGET_REG_BITS == 64,
> + .load_dest = true
> + };
> + static const GVecGen3 bif_op = {
> + .fni8 = gen_bif_i64,
> + .fniv = gen_bif_vec,
> + .prefer_i64 = TCG_TARGET_REG_BITS == 64,
> + .load_dest = true
> + };
> +
> int rd = extract32(insn, 0, 5);
> int rn = extract32(insn, 5, 5);
> int rm = extract32(insn, 16, 5);
> int size = extract32(insn, 22, 2);
> bool is_u = extract32(insn, 29, 1);
> bool is_q = extract32(insn, 30, 1);
> - TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
> - int pass;
> + GVecGenTwoFn *gvec_fn;
> + const GVecGen3 *gvec_op;
>
> if (!fp_access_check(s)) {
> return;
> }
>
> - tcg_op1 = tcg_temp_new_i64();
> - tcg_op2 = tcg_temp_new_i64();
> - tcg_res[0] = tcg_temp_new_i64();
> - tcg_res[1] = tcg_temp_new_i64();
> -
> - for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
> - read_vec_element(s, tcg_op1, rn, pass, MO_64);
> - read_vec_element(s, tcg_op2, rm, pass, MO_64);
> -
> - if (!is_u) {
> - switch (size) {
> - case 0: /* AND */
> - tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
> - break;
> - case 1: /* BIC */
> - tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
> - break;
> - case 2: /* ORR */
> - tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
> - break;
> - case 3: /* ORN */
> - tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
> - break;
> - }
> - } else {
> - if (size != 0) {
> - /* B* ops need res loaded to operate on */
> - read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
> - }
> -
> - switch (size) {
> - case 0: /* EOR */
> - tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
> - break;
> - case 1: /* BSL bitwise select */
> - tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
> - tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
> - tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
> - break;
> - case 2: /* BIT, bitwise insert if true */
> - tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
> - tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
> - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
> - break;
> - case 3: /* BIF, bitwise insert if false */
> - tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
> - tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
> - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
> - break;
> - }
> - }
> - }
> + switch (size + 4 * is_u) {
> + case 0: /* AND */
> + gvec_fn = tcg_gen_gvec_and;
> + goto do_fn;
> + case 1: /* BIC */
> + gvec_fn = tcg_gen_gvec_andc;
> + goto do_fn;
> + case 2: /* ORR */
> + gvec_fn = tcg_gen_gvec_or;
> + goto do_fn;
> + case 3: /* ORN */
> + gvec_fn = tcg_gen_gvec_orc;
> + goto do_fn;
> + case 4: /* EOR */
> + gvec_fn = tcg_gen_gvec_xor;
> + goto do_fn;
> + do_fn:
> + gvec_fn(vec_full_reg_offset(s, rd),
> + vec_full_reg_offset(s, rn),
> + vec_full_reg_offset(s, rm),
> + is_q ? 16 : 8, vec_full_reg_size(s));
> + return;
> +
> + case 5: /* BSL bitwise select */
> + gvec_op = &bsl_op;
> + goto do_op;
> + case 6: /* BIT, bitwise insert if true */
> + gvec_op = &bit_op;
> + goto do_op;
> + case 7: /* BIF, bitwise insert if false */
> + gvec_op = &bif_op;
> + goto do_op;
> + do_op:
> + tcg_gen_gvec_3(vec_full_reg_offset(s, rd),
> + vec_full_reg_offset(s, rn),
> + vec_full_reg_offset(s, rm),
> + is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
> + return;
>
> - write_vec_element(s, tcg_res[0], rd, 0, MO_64);
> - if (!is_q) {
> - tcg_gen_movi_i64(tcg_res[1], 0);
> + default:
> + g_assert_not_reached();
> }
> - write_vec_element(s, tcg_res[1], rd, 1, MO_64);
> -
> - tcg_temp_free_i64(tcg_op1);
> - tcg_temp_free_i64(tcg_op2);
> - tcg_temp_free_i64(tcg_res[0]);
> - tcg_temp_free_i64(tcg_res[1]);
> }
>
> /* Helper functions for 32 bit comparisons */
> @@ -9375,6 +9432,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
> int rn = extract32(insn, 5, 5);
> int rd = extract32(insn, 0, 5);
> int pass;
> + GVecGenTwoFn *gvec_op;
>
> switch (opcode) {
> case 0x13: /* MUL, PMUL */
> @@ -9414,6 +9472,28 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
> return;
> }
>
> + switch (opcode) {
> + case 0x10: /* ADD, SUB */
> + {
> + static GVecGenTwoFn * const fns[4][2] = {
> + { tcg_gen_gvec_add8, tcg_gen_gvec_sub8 },
> + { tcg_gen_gvec_add16, tcg_gen_gvec_sub16 },
> + { tcg_gen_gvec_add32, tcg_gen_gvec_sub32 },
> + { tcg_gen_gvec_add64, tcg_gen_gvec_sub64 },
> + };
> + gvec_op = fns[size][u];
> + goto do_gvec;
> + }
> + break;
> +
> + do_gvec:
> + gvec_op(vec_full_reg_offset(s, rd),
> + vec_full_reg_offset(s, rn),
> + vec_full_reg_offset(s, rm),
> + is_q ? 16 : 8, vec_full_reg_size(s));
> + return;
> + }
> +
> if (size == 3) {
> assert(is_q);
> for (pass = 0; pass < 2; pass++) {
> @@ -9586,16 +9666,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
> genfn = fns[size][u];
> break;
> }
> - case 0x10: /* ADD, SUB */
> - {
> - static NeonGenTwoOpFn * const fns[3][2] = {
> - { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
> - { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
> - { tcg_gen_add_i32, tcg_gen_sub_i32 },
> - };
> - genfn = fns[size][u];
> - break;
> - }
> case 0x11: /* CMTST, CMEQ */
> {
> static NeonGenTwoOpFn * const fns[3][2] = {
--
Alex Bennée
next prev parent reply other threads:[~2017-09-26 23:13 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-16 2:34 [Qemu-devel] [PATCH v3 0/6] TCG vectorization and example conversion Richard Henderson
2017-09-16 2:34 ` [Qemu-devel] [PATCH v3 1/6] tcg: Add types and operations for host vectors Richard Henderson
2017-09-26 19:28 ` Alex Bennée
2017-09-27 16:18 ` Richard Henderson
2017-09-16 2:34 ` [Qemu-devel] [PATCH v3 2/6] tcg: Add vector expanders Richard Henderson
2017-09-26 22:31 ` Alex Bennée
2017-09-16 2:34 ` [Qemu-devel] [PATCH v3 3/6] target/arm: Align vector registers Richard Henderson
2017-09-26 22:33 ` Alex Bennée
2017-09-16 2:34 ` [Qemu-devel] [PATCH v3 4/6] target/arm: Use vector infrastructure for aa64 add/sub/logic Richard Henderson
2017-09-26 23:12 ` Alex Bennée [this message]
2017-09-16 2:34 ` [Qemu-devel] [PATCH v3 5/6] tcg/i386: Add vector operations Richard Henderson
2017-09-16 2:34 ` [Qemu-devel] [PATCH v3 6/6] tcg/aarch64: " Richard Henderson
2017-09-16 2:35 ` [Qemu-devel] [PATCH v3 0/6] TCG vectorization and example conversion Richard Henderson
2017-09-26 22:58 ` no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87o9pxcav8.fsf@linaro.org \
--to=alex.bennee@linaro.org \
--cc=f4bug@amsat.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.