* [Qemu-devel] [PATCH 1/6] target-arm: A64: Add opcode comments to disas_simd_three_reg_diff
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
@ 2014-02-16 18:21 ` Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 2/6] target-arm: A64: Add most remaining three-reg-diff widening ops Peter Maydell
` (5 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2014-02-16 18:21 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall, Richard Henderson
The opcode switch in disas_simd_three_reg_diff() is missing the
customary comments indicating which cases correspond to which
instructions. Add them.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/translate-a64.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 3de9cad..d4e7a20 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7091,24 +7091,24 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
/* 128 x 128 -> 64 */
unsupported_encoding(s, insn);
break;
- case 9:
- case 11:
- case 13:
- case 14:
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ case 14: /* PMULL, PMULL2 */
if (is_u) {
unallocated_encoding(s);
return;
}
/* fall through */
- case 0:
- case 2:
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
unsupported_encoding(s, insn);
break;
- case 5:
- case 7:
- case 8:
- case 10:
- case 12:
+ case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+ case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+ case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
/* 64 x 64 -> 128 */
if (size == 3) {
unallocated_encoding(s);
--
1.8.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 2/6] target-arm: A64: Add most remaining three-reg-diff widening ops
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 1/6] target-arm: A64: Add opcode comments to disas_simd_three_reg_diff Peter Maydell
@ 2014-02-16 18:21 ` Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 3/6] target-arm: A64: Implement the wide 3-reg-different operations Peter Maydell
` (4 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2014-02-16 18:21 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall, Richard Henderson
Add the remainder of the 64x64->128 operations in the three-reg-diff
category except for PMULL, PMULL2.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/translate-a64.c | 109 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 88 insertions(+), 21 deletions(-)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index d4e7a20..809fbdb 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -75,8 +75,10 @@ typedef struct AArch64DecodeTable {
/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
@@ -6879,6 +6881,24 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
}
}
+/* Generate code to do a "long" addition or subtraction, ie one done in
+ * TCGv_i64 on vector lanes twice the width specified by size.
+ */
+static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
+ TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
+{
+ static NeonGenTwo64OpFn * const fns[3][2] = {
+ { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
+ { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
+ { tcg_gen_add_i64, tcg_gen_sub_i64 },
+ };
+ NeonGenTwo64OpFn *genfn;
+ assert(size < 3);
+
+ genfn = fns[size][is_sub];
+ genfn(tcg_res, tcg_op1, tcg_op2);
+}
+
static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
int opcode, int rd, int rn, int rm)
{
@@ -6934,6 +6954,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
switch (opcode) {
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
{
@@ -6954,15 +6980,31 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ break;
default:
g_assert_not_reached();
}
- if (accop > 0) {
+ if (opcode == 9 || opcode == 11) {
+ /* saturating accumulate ops */
+ if (accop < 0) {
+ tcg_gen_neg_i64(tcg_passres, tcg_passres);
+ }
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ tcg_res[pass], tcg_passres);
+ } else if (accop > 0) {
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- tcg_temp_free_i64(tcg_passres);
} else if (accop < 0) {
tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ }
+
+ if (accop != 0) {
tcg_temp_free_i64(tcg_passres);
}
@@ -6987,6 +7029,23 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
switch (opcode) {
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ {
+ TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
+ static NeonGenWidenFn * const widenfns[2][2] = {
+ { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+ { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+ };
+ NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+ widenfn(tcg_op2_64, tcg_op2);
+ widenfn(tcg_passres, tcg_op1);
+ gen_neon_addl(size, (opcode == 2), tcg_passres,
+ tcg_passres, tcg_op2_64);
+ tcg_temp_free_i64(tcg_op2_64);
+ break;
+ }
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
if (size == 0) {
@@ -7020,28 +7079,32 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
}
break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ assert(size == 1);
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ break;
default:
g_assert_not_reached();
}
tcg_temp_free_i32(tcg_op1);
tcg_temp_free_i32(tcg_op2);
- if (accop > 0) {
- if (size == 0) {
- gen_helper_neon_addl_u16(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- } else {
- gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- }
- tcg_temp_free_i64(tcg_passres);
- } else if (accop < 0) {
- if (size == 0) {
- gen_helper_neon_subl_u16(tcg_res[pass], tcg_res[pass],
- tcg_passres);
+ if (accop != 0) {
+ if (opcode == 9 || opcode == 11) {
+ /* saturating accumulate ops */
+ if (accop < 0) {
+ gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+ }
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
} else {
- gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
+ gen_neon_addl(size, (accop < 0), tcg_res[pass],
+ tcg_res[pass], tcg_passres);
}
tcg_temp_free_i64(tcg_passres);
}
@@ -7091,19 +7154,23 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
/* 128 x 128 -> 64 */
unsupported_encoding(s, insn);
break;
+ case 14: /* PMULL, PMULL2 */
+ if (is_u || size == 1 || size == 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ break;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
- case 14: /* PMULL, PMULL2 */
- if (is_u) {
+ if (is_u || size == 0) {
unallocated_encoding(s);
return;
}
/* fall through */
case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- unsupported_encoding(s, insn);
- break;
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
--
1.8.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 3/6] target-arm: A64: Implement the wide 3-reg-different operations
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 1/6] target-arm: A64: Add opcode comments to disas_simd_three_reg_diff Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 2/6] target-arm: A64: Add most remaining three-reg-diff widening ops Peter Maydell
@ 2014-02-16 18:21 ` Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 4/6] target-arm: A64: Implement narrowing three-reg-diff operations Peter Maydell
` (3 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2014-02-16 18:21 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall, Richard Henderson
Implement the wide three-reg-different operations:
SADDW, UADDW, SSUBW and USUBW.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/translate-a64.c | 41 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 809fbdb..511f15e 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7117,6 +7117,41 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
tcg_temp_free_i64(tcg_res[1]);
}
+static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
+ int opcode, int rd, int rn, int rm)
+{
+ TCGv_i64 tcg_res[2];
+ int part = is_q ? 2 : 0;
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
+ static NeonGenWidenFn * const widenfns[3][2] = {
+ { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+ { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+ { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
+ };
+ NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
+ widenfn(tcg_op2_wide, tcg_op2);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_res[pass] = tcg_temp_new_i64();
+ gen_neon_addl(size, (opcode == 3),
+ tcg_res[pass], tcg_op1, tcg_op2_wide);
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2_wide);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+}
+
/* C3.6.15 AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -7147,7 +7182,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
/* 64 x 128 -> 128 */
- unsupported_encoding(s, insn);
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
--
1.8.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 4/6] target-arm: A64: Implement narrowing three-reg-diff operations
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
` (2 preceding siblings ...)
2014-02-16 18:21 ` [Qemu-devel] [PATCH 3/6] target-arm: A64: Implement the wide 3-reg-different operations Peter Maydell
@ 2014-02-16 18:21 ` Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction Peter Maydell
` (2 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2014-02-16 18:21 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall, Richard Henderson
Implement the narrowing three-reg-diff operations: ADDHN,
RADDHN, SUBHN and RSUBHN.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/translate-a64.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 59 insertions(+), 1 deletion(-)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 511f15e..cb630d8 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7152,6 +7152,60 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
}
}
+static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
+{
+ tcg_gen_shri_i64(in, in, 32);
+ tcg_gen_trunc_i64_i32(res, in);
+}
+
+static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
+{
+ tcg_gen_addi_i64(in, in, 1U << 31);
+ do_narrow_high_u32(res, in);
+}
+
+static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
+ int opcode, int rd, int rn, int rm)
+{
+ TCGv_i32 tcg_res[2];
+ int part = is_q ? 2 : 0;
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_wideres = tcg_temp_new_i64();
+ static NeonGenNarrowFn * const narrowfns[3][2] = {
+ { gen_helper_neon_narrow_high_u8,
+ gen_helper_neon_narrow_round_high_u8 },
+ { gen_helper_neon_narrow_high_u16,
+ gen_helper_neon_narrow_round_high_u16 },
+ { do_narrow_high_u32, do_narrow_round_high_u32 },
+ };
+ NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+
+ tcg_res[pass] = tcg_temp_new_i32();
+ gennarrow(tcg_res[pass], tcg_wideres);
+ tcg_temp_free_i64(tcg_wideres);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
/* C3.6.15 AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -7191,7 +7245,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
/* 128 x 128 -> 64 */
- unsupported_encoding(s, insn);
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
case 14: /* PMULL, PMULL2 */
if (is_u || size == 1 || size == 2) {
--
1.8.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
` (3 preceding siblings ...)
2014-02-16 18:21 ` [Qemu-devel] [PATCH 4/6] target-arm: A64: Implement narrowing three-reg-diff operations Peter Maydell
@ 2014-02-16 18:21 ` Peter Maydell
2014-02-17 16:29 ` Richard Henderson
2014-02-16 18:21 ` [Qemu-devel] [PATCH 6/6] target-arm: A64: Implement unprivileged load/store Peter Maydell
2014-02-17 16:30 ` [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Richard Henderson
6 siblings, 1 reply; 10+ messages in thread
From: Peter Maydell @ 2014-02-16 18:21 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall, Richard Henderson
Implement the PMULL instruction; this is the last unimplemented insn
in the three-reg-diff group.
Note that PMULL with size 3 is considered part of the AES part
of the crypto extensions (see the ID_AA64ISAR0_EL1 register definition
in the v8 ARM ARM), so it isn't necessary to burn an extra feature
bit on it, even though we're using more feature bits than a single
"crypto extension present/not present" toggle.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/helper-a64.c | 30 ++++++++++++++++++++++++++++++
target-arm/helper-a64.h | 2 ++
target-arm/translate-a64.c | 41 +++++++++++++++++++++++++++++++++++++++--
target-arm/translate.c | 1 +
target-arm/translate.h | 6 ++++++
5 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index c2ce33e..84dd2cd 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -180,6 +180,36 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result;
}
+/* Helper function for 64 bit polynomial multiply case:
+ * perform PolynomialMult(op1, op2) and return either the top or
+ * bottom half of the 128 bit result.
+ */
+uint64_t HELPER(neon_pmull_64_lo)(CPUARMState *env, uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ for (bitnum = 0; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 << bitnum;
+ }
+ }
+ return res;
+}
+uint64_t HELPER(neon_pmull_64_hi)(CPUARMState *env, uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ /* bit 0 of op1 can't influence the high 64 bits at all */
+ for (bitnum = 1; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 >> (64 - bitnum);
+ }
+ }
+ return res;
+}
+
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index ab9933c..95e4220 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -27,6 +27,8 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_3(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, env, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index cb630d8..1ceaa8a 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7087,6 +7087,10 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
tcg_passres, tcg_passres);
break;
+ case 14: /* PMULL */
+ assert(size == 0);
+ gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
+ break;
default:
g_assert_not_reached();
}
@@ -7206,6 +7210,30 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
}
}
+static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
+{
+ /* PMULL of 64 x 64 -> 128 is an odd special case because it
+ * is the only three-reg-diff instruction which produces a
+ * 128-bit wide result from a single operation. However since
+ * it's possible to calculate the two halves more or less
+ * separately we just use two helper calls.
+ */
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, is_q, MO_64);
+ read_vec_element(s, tcg_op2, rm, is_q, MO_64);
+ gen_helper_neon_pmull_64_lo(tcg_res, cpu_env, tcg_op1, tcg_op2);
+ write_vec_element(s, tcg_res, rd, 0, MO_64);
+ gen_helper_neon_pmull_64_hi(tcg_res, cpu_env, tcg_op1, tcg_op2);
+ write_vec_element(s, tcg_res, rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+}
+
/* C3.6.15 AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -7256,8 +7284,15 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
- break;
+ if (size == 3) {
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_pmull_64(s, is_q, rd, rn, rm);
+ return;
+ }
+ goto is_widening;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
@@ -7278,6 +7313,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ is_widening:
handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
default:
@@ -9008,6 +9044,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
dc->vec_stride = 0;
dc->cp_regs = cpu->cp_regs;
dc->current_pl = arm_current_pl(env);
+ dc->features = env->features;
init_tmp_a64_array(dc);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6ccf0ba..d340243 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -10595,6 +10595,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
dc->cp_regs = cpu->cp_regs;
dc->current_pl = arm_current_pl(env);
+ dc->features = env->features;
cpu_F0s = tcg_temp_new_i32();
cpu_F1s = tcg_temp_new_i32();
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 67da699..889a031 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -26,6 +26,7 @@ typedef struct DisasContext {
int aarch64;
int current_pl;
GHashTable *cp_regs;
+ uint64_t features; /* CPU features bits */
#define TMP_A64_MAX 16
int tmp_a64_count;
TCGv_i64 tmp_a64[TMP_A64_MAX];
@@ -33,6 +34,11 @@ typedef struct DisasContext {
extern TCGv_ptr cpu_env;
+static inline int arm_dc_feature(DisasContext *dc, int feature)
+{
+ return (dc->features & (1ULL << feature)) != 0;
+}
+
/* target-specific extra values for is_jmp */
/* These instructions trap after executing, so the A32/T32 decoder must
* defer them until after the conditional execution state has been updated.
--
1.8.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction
2014-02-16 18:21 ` [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction Peter Maydell
@ 2014-02-17 16:29 ` Richard Henderson
2014-02-17 22:59 ` Peter Maydell
0 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2014-02-17 16:29 UTC (permalink / raw)
To: Peter Maydell, qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall
On 02/16/2014 12:21 PM, Peter Maydell wrote:
> +/* Helper function for 64 bit polynomial multiply case:
> + * perform PolynomialMult(op1, op2) and return either the top or
> + * bottom half of the 128 bit result.
> + */
> +uint64_t HELPER(neon_pmull_64_lo)(CPUARMState *env, uint64_t op1, uint64_t op2)
> +{
> + int bitnum;
> + uint64_t res = 0;
> +
> + for (bitnum = 0; bitnum < 64; bitnum++) {
> + if (op1 & (1ULL << bitnum)) {
> + res ^= op2 << bitnum;
> + }
> + }
> + return res;
> +}
These functions don't use env. Why are you passing it?
r~
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction
2014-02-17 16:29 ` Richard Henderson
@ 2014-02-17 22:59 ` Peter Maydell
0 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2014-02-17 22:59 UTC (permalink / raw)
To: Richard Henderson
Cc: Peter Crosthwaite, Patch Tracking, Michael Matz, Alexander Graf,
QEMU Developers, Claudio Fontana, Dirk Mueller, Will Newton,
Laurent Desnogues, Alex Bennée, kvmarm@lists.cs.columbia.edu,
Christoffer Dall
On 17 February 2014 16:29, Richard Henderson <rth@twiddle.net> wrote:
> On 02/16/2014 12:21 PM, Peter Maydell wrote:
>> +/* Helper function for 64 bit polynomial multiply case:
>> + * perform PolynomialMult(op1, op2) and return either the top or
>> + * bottom half of the 128 bit result.
>> + */
>> +uint64_t HELPER(neon_pmull_64_lo)(CPUARMState *env, uint64_t op1, uint64_t op2)
>> +{
>> + int bitnum;
>> + uint64_t res = 0;
>> +
>> + for (bitnum = 0; bitnum < 64; bitnum++) {
>> + if (op1 & (1ULL << bitnum)) {
>> + res ^= op2 << bitnum;
>> + }
>> + }
>> + return res;
>> +}
>
> These functions don't use env. Why are you passing it?
Oops, that's leftover from my initial approach to the problem, which
used a single helper to calculate the full 128 bit result (and which
thus needed env so it could write to env->vfp.regs, since C and TCG
don't support multiple return values or 128 bit returns). I'll remove it.
thanks
-- PMM
^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 6/6] target-arm: A64: Implement unprivileged load/store
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
` (4 preceding siblings ...)
2014-02-16 18:21 ` [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction Peter Maydell
@ 2014-02-16 18:21 ` Peter Maydell
2014-02-17 16:30 ` [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Richard Henderson
6 siblings, 0 replies; 10+ messages in thread
From: Peter Maydell @ 2014-02-16 18:21 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall, Richard Henderson
Implement the unprivileged load and store instructions.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/translate-a64.c | 69 +++++++++++++++++++++++++---------------------
1 file changed, 37 insertions(+), 32 deletions(-)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 1ceaa8a..8f1da24 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -618,20 +618,26 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
*/
/*
- * Store from GPR register to memory
+ * Store from GPR register to memory.
*/
+static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
+ TCGv_i64 tcg_addr, int size, int memidx)
+{
+ g_assert(size <= 3);
+ tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
+}
+
static void do_gpr_st(DisasContext *s, TCGv_i64 source,
TCGv_i64 tcg_addr, int size)
{
- g_assert(size <= 3);
- tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
+ do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
}
/*
* Load from memory to GPR register
*/
-static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
- int size, bool is_signed, bool extend)
+static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+ int size, bool is_signed, bool extend, int memidx)
{
TCGMemOp memop = MO_TE + size;
@@ -641,7 +647,7 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
memop += MO_SIGN;
}
- tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
+ tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
if (extend && is_signed) {
g_assert(size < 3);
@@ -649,6 +655,13 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
}
}
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+ int size, bool is_signed, bool extend)
+{
+ do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
+ get_mem_index(s));
+}
+
/*
* Store from FP register to memory
*/
@@ -1824,6 +1837,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
* +----+-------+---+-----+-----+---+--------+-----+------+------+
*
* idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
+ 10 -> unprivileged
* V = 0 -> non-vector
* size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
* opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
@@ -1839,6 +1853,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
bool is_signed = false;
bool is_store = false;
bool is_extended = false;
+ bool is_unpriv = (idx == 2);
bool is_vector = extract32(insn, 26, 1);
bool post_index;
bool writeback;
@@ -1847,7 +1862,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
if (is_vector) {
size |= (opc & 2) << 1;
- if (size > 4) {
+ if (size > 4 || is_unpriv) {
unallocated_encoding(s);
return;
}
@@ -1855,6 +1870,10 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
} else {
if (size == 3 && opc == 2) {
/* PRFM - prefetch */
+ if (is_unpriv) {
+ unallocated_encoding(s);
+ return;
+ }
return;
}
if (opc == 3 && size > 1) {
@@ -1868,6 +1887,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
switch (idx) {
case 0:
+ case 2:
post_index = false;
writeback = false;
break;
@@ -1879,9 +1899,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
post_index = false;
writeback = true;
break;
- case 2:
- g_assert(false);
- break;
}
if (rn == 31) {
@@ -1901,10 +1918,13 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
+ int memidx = is_unpriv ? 1 : get_mem_index(s);
+
if (is_store) {
- do_gpr_st(s, tcg_rt, tcg_addr, size);
+ do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
} else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
+ do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
+ is_signed, is_extended, memidx);
}
}
@@ -2084,25 +2104,6 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
}
}
-/* Load/store register (immediate forms) */
-static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
-{
- switch (extract32(insn, 10, 2)) {
- case 0: case 1: case 3:
- /* Load/store register (unscaled immediate) */
- /* Load/store immediate pre/post-indexed */
- disas_ldst_reg_imm9(s, insn);
- break;
- case 2:
- /* Load/store register unprivileged */
- unsupported_encoding(s, insn);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
-}
-
/* Load/store register (all forms) */
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
{
@@ -2111,7 +2112,11 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
disas_ldst_reg_roffset(s, insn);
} else {
- disas_ldst_reg_imm(s, insn);
+ /* Load/store register (unscaled immediate)
+ * Load/store immediate pre/post-indexed
+ * Load/store register unprivileged
+ */
+ disas_ldst_reg_imm9(s, insn);
}
break;
case 1:
--
1.8.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
` (5 preceding siblings ...)
2014-02-16 18:21 ` [Qemu-devel] [PATCH 6/6] target-arm: A64: Implement unprivileged load/store Peter Maydell
@ 2014-02-17 16:30 ` Richard Henderson
6 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2014-02-17 16:30 UTC (permalink / raw)
To: Peter Maydell, qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Claudio Fontana, Dirk Mueller, Will Newton, Laurent Desnogues,
Alex Bennée, kvmarm, Christoffer Dall
On 02/16/2014 12:21 PM, Peter Maydell wrote:
> Another patchset of random filling in the gaps in our Neon
> coverage. Patches 1 through 5 complete our handling of the
> three-reg-different category of Neon instructions.
>
> Patch 6 implements the "unprivileged load/store" LDTR/STTR
> instructions. (In fact AArch64 Linux doesn't use these, and they're
> pretty pointless in user mode, but they're easy enough to implement.)
>
> I haven't put in any of the fp_access_check() calls because
> I'm assuming these will pass code review before the system
> emulation patchset does; I'll update the latter to add the
> extra checks as necessary at that point.
>
> thanks
> -- PMM
Aside from a nit in 5/6,
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 10+ messages in thread