* [PATCH v3 0/2] target/arm: vector tail cleanups
@ 2020-05-19 21:24 Richard Henderson
2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2020-05-19 21:24 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Version 3 fixes the reported bug in EXT.
I should make sure I have fixed the bug wherein RISU prints a
mismatch and still exits with success, which hid this problem
in the scrollback.
r~
Richard Henderson (2):
target/arm: Use tcg_gen_gvec_mov for clear_vec_high
target/arm: Use clear_vec_high more effectively
target/arm/translate-a64.c | 63 ++++++++++++++++++++------------------
1 file changed, 34 insertions(+), 29 deletions(-)
--
2.20.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high
2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
@ 2020-05-19 21:24 ` Richard Henderson
2020-05-19 21:24 ` [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively Richard Henderson
2020-05-21 16:15 ` [PATCH v3 0/2] target/arm: vector tail cleanups Peter Maydell
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2020-05-19 21:24 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
The 8-byte store for the end a !is_q operation can be
merged with the other stores. Use a no-op vector move
to trigger the expand_clr portion of tcg_gen_gvec_mov.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 991e451644..4f6edb2892 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -496,14 +496,8 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
unsigned ofs = fp_reg_offset(s, rd, MO_64);
unsigned vsz = vec_full_reg_size(s);
- if (!is_q) {
- TCGv_i64 tcg_zero = tcg_const_i64(0);
- tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
- tcg_temp_free_i64(tcg_zero);
- }
- if (vsz > 16) {
- tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0);
- }
+ /* Nop move, with side effect of clearing the tail. */
+ tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
}
void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
--
2.20.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively
2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
@ 2020-05-19 21:24 ` Richard Henderson
2020-05-21 16:15 ` [PATCH v3 0/2] target/arm: vector tail cleanups Peter Maydell
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2020-05-19 21:24 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, Alex Bennée
Do not explicitly store zero to the NEON high part
when we can pass !is_q to clear_vec_high.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 53 +++++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4f6edb2892..874f3eb4f9 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -900,11 +900,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
{
/* This always zero-extends and writes to a full 128 bit wide vector */
TCGv_i64 tmplo = tcg_temp_new_i64();
- TCGv_i64 tmphi;
+ TCGv_i64 tmphi = NULL;
if (size < 4) {
MemOp memop = s->be_data + size;
- tmphi = tcg_const_i64(0);
tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
} else {
bool be = s->be_data == MO_BE;
@@ -922,12 +921,13 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
}
tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
- tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
-
tcg_temp_free_i64(tmplo);
- tcg_temp_free_i64(tmphi);
- clear_vec_high(s, true, destidx);
+ if (tmphi) {
+ tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
+ tcg_temp_free_i64(tmphi);
+ }
+ clear_vec_high(s, tmphi != NULL, destidx);
}
/*
@@ -6934,7 +6934,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_resh, rm, 0, MO_64);
do_ext64(s, tcg_resh, tcg_resl, pos);
}
- tcg_gen_movi_i64(tcg_resh, 0);
} else {
TCGv_i64 tcg_hh;
typedef struct {
@@ -6964,9 +6963,11 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_resl, rd, 0, MO_64);
tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ }
tcg_temp_free_i64(tcg_resh);
- clear_vec_high(s, true, rd);
+ clear_vec_high(s, is_q, rd);
}
/* TBL/TBX
@@ -7003,17 +7004,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
* the input.
*/
tcg_resl = tcg_temp_new_i64();
- tcg_resh = tcg_temp_new_i64();
+ tcg_resh = NULL;
if (is_tblx) {
read_vec_element(s, tcg_resl, rd, 0, MO_64);
} else {
tcg_gen_movi_i64(tcg_resl, 0);
}
- if (is_tblx && is_q) {
- read_vec_element(s, tcg_resh, rd, 1, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resh, 0);
+
+ if (is_q) {
+ tcg_resh = tcg_temp_new_i64();
+ if (is_tblx) {
+ read_vec_element(s, tcg_resh, rd, 1, MO_64);
+ } else {
+ tcg_gen_movi_i64(tcg_resh, 0);
+ }
}
tcg_idx = tcg_temp_new_i64();
@@ -7033,9 +7038,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_resl, rd, 0, MO_64);
tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- clear_vec_high(s, true, rd);
+
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+ }
+ clear_vec_high(s, is_q, rd);
}
/* ZIP/UZP/TRN
@@ -7072,7 +7080,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
}
tcg_resl = tcg_const_i64(0);
- tcg_resh = tcg_const_i64(0);
+ tcg_resh = is_q ? tcg_const_i64(0) : NULL;
tcg_res = tcg_temp_new_i64();
for (i = 0; i < elements; i++) {
@@ -7123,9 +7131,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_resl, rd, 0, MO_64);
tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- clear_vec_high(s, true, rd);
+
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+ }
+ clear_vec_high(s, is_q, rd);
}
/*
--
2.20.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v3 0/2] target/arm: vector tail cleanups
2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
2020-05-19 21:24 ` [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively Richard Henderson
@ 2020-05-21 16:15 ` Peter Maydell
2 siblings, 0 replies; 4+ messages in thread
From: Peter Maydell @ 2020-05-21 16:15 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers
On Tue, 19 May 2020 at 22:24, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Version 3 fixes the reported bug in EXT.
>
> I should make sure I have fixed the bug wherein RISU prints a
> mismatch and still exits with success, which hid this problem
> in the scrollback.
Applied to target-arm.next, thanks.
-- PMM
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-05-21 16:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
2020-05-19 21:24 ` [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively Richard Henderson
2020-05-21 16:15 ` [PATCH v3 0/2] target/arm: vector tail cleanups Peter Maydell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).