From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 25/45] target/arm: Promote consecutive memory ops for aa64
Date: Fri, 19 Oct 2018 17:57:15 +0100 [thread overview]
Message-ID: <20181019165735.22511-26-peter.maydell@linaro.org> (raw)
In-Reply-To: <20181019165735.22511-1-peter.maydell@linaro.org>
From: Richard Henderson <richard.henderson@linaro.org>
For a sequence of loads or stores from a single register,
little-endian operations can be promoted to an 8-byte op.
This can reduce the number of operations by a factor of 8.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20181011205206.3552-5-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/translate-a64.c | 66 +++++++++++++++++++++++---------------
1 file changed, 40 insertions(+), 26 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 39ac45c0080..f1bd9d7633a 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1200,25 +1200,23 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
/* Store from vector register to memory */
static void do_vec_st(DisasContext *s, int srcidx, int element,
- TCGv_i64 tcg_addr, int size)
+ TCGv_i64 tcg_addr, int size, TCGMemOp endian)
{
- TCGMemOp memop = s->be_data + size;
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
read_vec_element(s, tcg_tmp, srcidx, element, size);
- tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+ tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
tcg_temp_free_i64(tcg_tmp);
}
/* Load from memory to vector register */
static void do_vec_ld(DisasContext *s, int destidx, int element,
- TCGv_i64 tcg_addr, int size)
+ TCGv_i64 tcg_addr, int size, TCGMemOp endian)
{
- TCGMemOp memop = s->be_data + size;
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+ tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
write_vec_element(s, tcg_tmp, destidx, element, size);
tcg_temp_free_i64(tcg_tmp);
@@ -3013,9 +3011,10 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
bool is_postidx = extract32(insn, 23, 1);
bool is_q = extract32(insn, 30, 1);
TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
+ TCGMemOp endian = s->be_data;
- int ebytes = 1 << size;
- int elements = (is_q ? 128 : 64) / (8 << size);
+ int ebytes; /* bytes per element */
+ int elements; /* elements per vector */
int rpt; /* num iterations */
int selem; /* structure elements */
int r;
@@ -3074,6 +3073,20 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
gen_check_sp_alignment(s);
}
+ /* For our purposes, bytes are always little-endian. */
+ if (size == 0) {
+ endian = MO_LE;
+ }
+
+ /* Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ if (selem == 1 && endian == MO_LE) {
+ size = 3;
+ }
+ ebytes = 1 << size;
+ elements = (is_q ? 16 : 8) / ebytes;
+
tcg_rn = cpu_reg_sp(s, rn);
tcg_addr = tcg_temp_new_i64();
tcg_gen_mov_i64(tcg_addr, tcg_rn);
@@ -3082,32 +3095,33 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
for (r = 0; r < rpt; r++) {
int e;
for (e = 0; e < elements; e++) {
- int tt = (rt + r) % 32;
int xs;
for (xs = 0; xs < selem; xs++) {
+ int tt = (rt + r + xs) % 32;
if (is_store) {
- do_vec_st(s, tt, e, tcg_addr, size);
+ do_vec_st(s, tt, e, tcg_addr, size, endian);
} else {
- do_vec_ld(s, tt, e, tcg_addr, size);
-
- /* For non-quad operations, setting a slice of the low
- * 64 bits of the register clears the high 64 bits (in
- * the ARM ARM pseudocode this is implicit in the fact
- * that 'rval' is a 64 bit wide variable).
- * For quad operations, we might still need to zero the
- * high bits of SVE. We optimize by noticing that we only
- * need to do this the first time we touch a register.
- */
- if (e == 0 && (r == 0 || xs == selem - 1)) {
- clear_vec_high(s, is_q, tt);
- }
+ do_vec_ld(s, tt, e, tcg_addr, size, endian);
}
tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
- tt = (tt + 1) % 32;
}
}
}
+ if (!is_store) {
+ /* For non-quad operations, setting a slice of the low
+ * 64 bits of the register clears the high 64 bits (in
+ * the ARM ARM pseudocode this is implicit in the fact
+ * that 'rval' is a 64 bit wide variable).
+ * For quad operations, we might still need to zero the
+ * high bits of SVE.
+ */
+ for (r = 0; r < rpt * selem; r++) {
+ int tt = (rt + r) % 32;
+ clear_vec_high(s, is_q, tt);
+ }
+ }
+
if (is_postidx) {
int rm = extract32(insn, 16, 5);
if (rm == 31) {
@@ -3228,9 +3242,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
} else {
/* Load/store one element per register */
if (is_load) {
- do_vec_ld(s, rt, index, tcg_addr, scale);
+ do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data);
} else {
- do_vec_st(s, rt, index, tcg_addr, scale);
+ do_vec_st(s, rt, index, tcg_addr, scale, s->be_data);
}
}
tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
--
2.19.1
next prev parent reply other threads:[~2018-10-19 17:00 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-19 16:56 [Qemu-devel] [PULL 00/45] target-arm queue Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 01/45] ssi-sd: Make devices picking up backends unavailable with -device Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 02/45] target/arm: Add support for VCPU event states Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 03/45] target/arm: Move some system registers into a substructure Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 04/45] target/arm: V8M should not imply V7VE Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 05/45] target/arm: Convert v8 extensions from feature bits to isar tests Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 06/45] target/arm: Convert division from feature bits to isar0 tests Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 07/45] target/arm: Convert jazelle from feature bit to isar1 test Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 08/45] target/arm: Convert t32ee from feature bit to isar3 test Peter Maydell
2018-10-19 16:56 ` [Qemu-devel] [PULL 09/45] target/arm: Convert sve from feature bit to aa64pfr0 test Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 10/45] target/arm: Convert v8.2-fp16 " Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 11/45] target/arm: Improve debug logging of AArch32 exception return Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 12/45] target/arm: Make switch_mode() file-local Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 13/45] target/arm: Implement HCR.FB Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 14/45] target/arm: Implement HCR.DC Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 15/45] target/arm: ISR_EL1 bits track virtual interrupts if IMO/FMO set Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 16/45] target/arm: Implement HCR.VI and VF Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 17/45] target/arm: Implement HCR.PTW Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 18/45] target/arm: New utility function to extract EC from syndrome Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 19/45] target/arm: Get IL bit correct for v7 syndrome values Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 20/45] target/arm: Report correct syndrome for FP/SIMD traps to Hyp mode Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 21/45] hw/arm/boot: Increase compliance with kernel arm64 boot protocol Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 22/45] target/arm: Hoist address increment for vector memory ops Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 23/45] target/arm: Don't call tcg_clear_temp_count Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 24/45] target/arm: Use tcg_gen_gvec_dup_i64 for LD[1-4]R Peter Maydell
2018-10-19 16:57 ` Peter Maydell [this message]
2018-10-19 16:57 ` [Qemu-devel] [PULL 26/45] target/arm: Mark some arrays const Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 27/45] target/arm: Use gvec for NEON VDUP Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 28/45] target/arm: Use gvec for NEON VMOV, VMVN, VBIC & VORR (immediate) Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 29/45] target/arm: Use gvec for NEON_3R_LOGIC insns Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 30/45] target/arm: Use gvec for NEON_3R_VADD_VSUB insns Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 31/45] target/arm: Use gvec for NEON_2RM_VMN, NEON_2RM_VNEG Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 32/45] target/arm: Use gvec for NEON_3R_VMUL Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 33/45] target/arm: Use gvec for VSHR, VSHL Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 34/45] target/arm: Use gvec for VSRA Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 35/45] target/arm: Use gvec for VSRI, VSLI Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 36/45] target/arm: Use gvec for NEON_3R_VML Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 37/45] target/arm: Use gvec for NEON_3R_VTST_VCEQ, NEON_3R_VCGT, NEON_3R_VCGE Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 38/45] target/arm: Use gvec for NEON VLD all lanes Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 39/45] target/arm: Reorg NEON VLD/VST all elements Peter Maydell
2018-11-02 16:31 ` Laurent Vivier
2018-10-19 16:57 ` [Qemu-devel] [PULL 40/45] target/arm: Promote consecutive memory ops for aa32 Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 41/45] target/arm: Reorg NEON VLD/VST single element to one lane Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 42/45] net: cadence_gem: Announce availability of priority queues Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 43/45] net: cadence_gem: Announce 64bit addressing support Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 44/45] target/arm: Remove writefn from TTBR0_EL3 Peter Maydell
2018-10-19 16:57 ` [Qemu-devel] [PULL 45/45] target/arm: Only flush tlb if ASID changes Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181019165735.22511-26-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).