From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 21/42] arm/translate-a64: add FP16 pairwise ops simd_three_reg_same_fp16
Date: Thu, 1 Mar 2018 11:23:42 +0000 [thread overview]
Message-ID: <20180301112403.12487-22-peter.maydell@linaro.org> (raw)
In-Reply-To: <20180301112403.12487-1-peter.maydell@linaro.org>
From: Alex Bennée <alex.bennee@linaro.org>
This includes FMAXNMP, FADDP, FMAXP, FMINNMP, FMINP.
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20180227143852.11175-14-alex.bennee@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/translate-a64.c | 208 +++++++++++++++++++++++++++++----------------
1 file changed, 133 insertions(+), 75 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 217e73ef58..e96e6cdd15 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10247,6 +10247,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
int datasize, elements;
int pass;
TCGv_ptr fpst;
+ bool pairwise = false;
if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
unallocated_encoding(s);
@@ -10272,91 +10273,148 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
datasize = is_q ? 128 : 64;
elements = datasize / 16;
+ switch (fpopcode) {
+ case 0x10: /* FMAXNMP */
+ case 0x12: /* FADDP */
+ case 0x16: /* FMAXP */
+ case 0x18: /* FMINNMP */
+ case 0x1e: /* FMINP */
+ pairwise = true;
+ break;
+ }
+
fpst = get_fpstatus_ptr(true);
- for (pass = 0; pass < elements; pass++) {
+ if (pairwise) {
+ int maxpass = is_q ? 8 : 4;
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
- TCGv_i32 tcg_res = tcg_temp_new_i32();
+ TCGv_i32 tcg_res[8];
- read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
- read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
+ for (pass = 0; pass < maxpass; pass++) {
+ int passreg = pass < (maxpass / 2) ? rn : rm;
+ int passelt = (pass << 1) & (maxpass - 1);
- switch (fpopcode) {
- case 0x0: /* FMAXNM */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1: /* FMLA */
- read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
- fpst);
- break;
- case 0x2: /* FADD */
- gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x3: /* FMULX */
- gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x4: /* FCMEQ */
- gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x6: /* FMAX */
- gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x7: /* FRECPS */
- gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x8: /* FMINNM */
- gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x9: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
- read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
- fpst);
- break;
- case 0xa: /* FSUB */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xe: /* FMIN */
- gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FRSQRTS */
- gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x13: /* FMUL */
- gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x14: /* FCMGE */
- gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x15: /* FACGE */
- gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x17: /* FDIV */
- gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
- case 0x1c: /* FCMGT */
- gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x1d: /* FACGT */
- gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
- __func__, insn, fpopcode, s->pc);
- g_assert_not_reached();
+ read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
+ read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (fpopcode) {
+ case 0x10: /* FMAXNMP */
+ gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
+ fpst);
+ break;
+ case 0x12: /* FADDP */
+ gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x16: /* FMAXP */
+ gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x18: /* FMINNMP */
+ gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
+ fpst);
+ break;
+ case 0x1e: /* FMINP */
+ gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ for (pass = 0; pass < maxpass; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
+ tcg_temp_free_i32(tcg_res[pass]);
}
- write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_op1);
tcg_temp_free_i32(tcg_op2);
+
+ } else {
+ for (pass = 0; pass < elements; pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
+ read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
+
+ switch (fpopcode) {
+ case 0x0: /* FMAXNM */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
+ fpst);
+ break;
+ case 0x2: /* FADD */
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3: /* FMULX */
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x4: /* FCMEQ */
+ gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x6: /* FMAX */
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7: /* FRECPS */
+ gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x8: /* FMINNM */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x9: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
+ fpst);
+ break;
+ case 0xa: /* FSUB */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xe: /* FMIN */
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FRSQRTS */
+ gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x13: /* FMUL */
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x14: /* FCMGE */
+ gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x15: /* FACGE */
+ gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x17: /* FDIV */
+ gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FABD */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
+ break;
+ case 0x1c: /* FCMGT */
+ gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1d: /* FACGT */
+ gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
+ __func__, insn, fpopcode, s->pc);
+ g_assert_not_reached();
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
}
tcg_temp_free_ptr(fpst);
--
2.16.2
next prev parent reply other threads:[~2018-03-01 11:24 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-01 11:23 [Qemu-devel] [PULL 00/42] target-arm queue Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 01/42] hw: register: Run post_write hook on reset Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 02/42] xilinx_spips: Enable only two slaves when reading/writing with stripe Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 03/42] xilinx_spips: Use 8 dummy cycles with the QIOR/QIOR4 commands Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 04/42] i2c: Fix some brace style issues Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 05/42] i2c: Move the bus class to i2c.h Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 06/42] hw/i2c-ddc: Do not fail writes Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 07/42] hw/sii9022: Add support for Silicon Image SII9022 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 08/42] arm/vexpress: Add proper display connector emulation Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 09/42] include/exec/helper-head.h: support f16 in helper calls Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 10/42] target/arm/cpu64: introduce ARM_V8_FP16 feature bit Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 11/42] target/arm/cpu.h: update comment for half-precision values Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 12/42] target/arm/cpu.h: add additional float_status flags Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 13/42] target/arm/helper: pass explicit fpst to set_rmode Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 14/42] arm/translate-a64: implement half-precision F(MIN|MAX)(V|NMV) Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 15/42] arm/translate-a64: handle_3same_64 comment fix Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 16/42] arm/translate-a64: initial decode for simd_three_reg_same_fp16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 17/42] arm/translate-a64: add FP16 FADD/FABD/FSUB/FMUL/FDIV to simd_three_reg_same_fp16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 18/42] arm/translate-a64: add FP16 F[A]C[EQ/GE/GT] " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 19/42] arm/translate-a64: add FP16 FMULA/X/S " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 20/42] arm/translate-a64: add FP16 FR[ECP/SQRT]S " Peter Maydell
2018-03-01 11:23 ` Peter Maydell [this message]
2018-03-01 11:23 ` [Qemu-devel] [PULL 22/42] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 23/42] arm/translate-a64: add FP16 x2 ops for simd_indexed Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 24/42] arm/translate-a64: initial decode for simd_two_reg_misc_fp16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 25/42] arm/translate-a64: add FP16 FPRINTx to simd_two_reg_misc_fp16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 26/42] arm/translate-a64: add FCVTxx " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 27/42] arm/translate-a64: add FP16 FCMxx (zero) " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 28/42] arm/translate-a64: add FP16 SCVTF/UCVFT " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 29/42] arm/translate-a64: add FP16 FNEG/FABS " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 30/42] arm/helper.c: re-factor recpe and add recepe_f16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 31/42] arm/translate-a64: add FP16 FRECPE Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 32/42] arm/translate-a64: add FP16 FRCPX to simd_two_reg_misc_fp16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 33/42] arm/translate-a64: add FP16 FSQRT " Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 34/42] arm/helper.c: re-factor rsqrte and add rsqrte_f16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 35/42] arm/translate-a64: add FP16 FRSQRTE to simd_two_reg_misc_fp16 Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 36/42] arm/translate-a64: add FP16 FMOV to simd_mod_imm Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 37/42] arm/translate-a64: add all FP16 ops in simd_scalar_pairwise Peter Maydell
2018-03-01 11:23 ` [Qemu-devel] [PULL 38/42] arm/translate-a64: implement simd_scalar_three_reg_same_fp16 Peter Maydell
2018-03-01 11:24 ` [Qemu-devel] [PULL 39/42] arm/translate-a64: add all single op FP16 to handle_fp_1src_half Peter Maydell
2018-03-01 11:24 ` [Qemu-devel] [PULL 40/42] target/arm: Enable ARM_V8_FP16 feature bit for the AArch64 "any" CPU Peter Maydell
2018-03-01 11:24 ` [Qemu-devel] [PULL 41/42] linux-user: Report AArch64 FP16 support via hwcap bits Peter Maydell
2018-03-01 11:24 ` [Qemu-devel] [PULL 42/42] MAINTAINERS: Update my email address Peter Maydell
2018-03-01 13:00 ` [Qemu-devel] [PULL 00/42] target-arm queue no-reply
2018-03-01 14:45 ` Peter Maydell
2018-03-02 0:52 ` Fam Zheng
2018-03-01 13:01 ` no-reply
2018-03-01 17:08 ` Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180301112403.12487-22-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).