From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org
Subject: [PATCH v3 17/18] target/arm: Reuse sve_probe_page for gather loads
Date: Tue, 21 Apr 2020 21:33:08 -0700 [thread overview]
Message-ID: <20200422043309.18430-18-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200422043309.18430-1-richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/sve_helper.c | 208 +++++++++++++++++++++-------------------
1 file changed, 109 insertions(+), 99 deletions(-)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index f4cdeecdcb..fffde4b6ec 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -5120,130 +5120,140 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
return *(uint64_t *)(reg + reg_ofs);
}
-static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE
+void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ int esize, int msize, zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
{
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc);
- ARMVectorReg scratch = { };
+ const int mmu_idx = cpu_mmu_index(env, false);
+ const intptr_t reg_max = simd_oprsz(desc);
+ ARMVectorReg scratch;
+ intptr_t reg_off;
+ SVEHostPage info, info2;
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ memset(&scratch, 0, reg_max);
+ reg_off = 0;
+ do {
+ uint64_t pg = vg[reg_off >> 6];
do {
if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i);
- tlb_fn(env, &scratch, i, base + (off << scale), ra);
+ target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+ target_ulong in_page = -(addr | TARGET_PAGE_MASK);
+
+ sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD,
+ mmu_idx, retaddr);
+
+ if (likely(in_page >= msize)) {
+ if (unlikely(info.flags & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr, msize,
+ info.attrs, BP_MEM_READ, retaddr);
+ }
+ /* TODO: MTE check */
+ host_fn(&scratch, reg_off, info.host);
+ } else {
+ /* Element crosses the page boundary. */
+ sve_probe_page(&info2, false, env, addr + in_page, 0,
+ MMU_DATA_LOAD, mmu_idx, retaddr);
+ if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr,
+ msize, info.attrs,
+ BP_MEM_READ, retaddr);
+ }
+ /* TODO: MTE check */
+ tlb_fn(env, &scratch, reg_off, addr, retaddr);
+ }
}
- i += 4, pg >>= 4;
- } while (i & 15);
- }
+ reg_off += esize;
+ pg >>= esize;
+ } while (reg_off & 63);
+ } while (reg_off < reg_max);
/* Wait until all exceptions have been raised to write back. */
- memcpy(vd, &scratch, oprsz);
+ memcpy(vd, &scratch, reg_max);
}
-static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn)
-{
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
- ARMVectorReg scratch = { };
-
- for (i = 0; i < oprsz; i++) {
- uint8_t pg = *(uint8_t *)(vg + H1(i));
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i * 8);
- tlb_fn(env, &scratch, i * 8, base + (off << scale), ra);
- }
- }
-
- /* Wait until all exceptions have been raised to write back. */
- memcpy(vd, &scratch, oprsz * 8);
+#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
-#define DO_LD1_ZPZ_S(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_s, sve_ld1##MEM##_tlb); \
+#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
-#define DO_LD1_ZPZ_D(MEM, OFS) \
-void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_d, sve_ld1##MEM##_tlb); \
-}
+DO_LD1_ZPZ_S(bsu, zsu, MO_8)
+DO_LD1_ZPZ_S(bsu, zss, MO_8)
+DO_LD1_ZPZ_D(bdu, zsu, MO_8)
+DO_LD1_ZPZ_D(bdu, zss, MO_8)
+DO_LD1_ZPZ_D(bdu, zd, MO_8)
-DO_LD1_ZPZ_S(bsu, zsu)
-DO_LD1_ZPZ_S(bsu, zss)
-DO_LD1_ZPZ_D(bdu, zsu)
-DO_LD1_ZPZ_D(bdu, zss)
-DO_LD1_ZPZ_D(bdu, zd)
+DO_LD1_ZPZ_S(bss, zsu, MO_8)
+DO_LD1_ZPZ_S(bss, zss, MO_8)
+DO_LD1_ZPZ_D(bds, zsu, MO_8)
+DO_LD1_ZPZ_D(bds, zss, MO_8)
+DO_LD1_ZPZ_D(bds, zd, MO_8)
-DO_LD1_ZPZ_S(bss, zsu)
-DO_LD1_ZPZ_S(bss, zss)
-DO_LD1_ZPZ_D(bds, zsu)
-DO_LD1_ZPZ_D(bds, zss)
-DO_LD1_ZPZ_D(bds, zd)
+DO_LD1_ZPZ_S(hsu_le, zsu, MO_16)
+DO_LD1_ZPZ_S(hsu_le, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zsu, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zd, MO_16)
-DO_LD1_ZPZ_S(hsu_le, zsu)
-DO_LD1_ZPZ_S(hsu_le, zss)
-DO_LD1_ZPZ_D(hdu_le, zsu)
-DO_LD1_ZPZ_D(hdu_le, zss)
-DO_LD1_ZPZ_D(hdu_le, zd)
+DO_LD1_ZPZ_S(hsu_be, zsu, MO_16)
+DO_LD1_ZPZ_S(hsu_be, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zsu, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zd, MO_16)
-DO_LD1_ZPZ_S(hsu_be, zsu)
-DO_LD1_ZPZ_S(hsu_be, zss)
-DO_LD1_ZPZ_D(hdu_be, zsu)
-DO_LD1_ZPZ_D(hdu_be, zss)
-DO_LD1_ZPZ_D(hdu_be, zd)
+DO_LD1_ZPZ_S(hss_le, zsu, MO_16)
+DO_LD1_ZPZ_S(hss_le, zss, MO_16)
+DO_LD1_ZPZ_D(hds_le, zsu, MO_16)
+DO_LD1_ZPZ_D(hds_le, zss, MO_16)
+DO_LD1_ZPZ_D(hds_le, zd, MO_16)
-DO_LD1_ZPZ_S(hss_le, zsu)
-DO_LD1_ZPZ_S(hss_le, zss)
-DO_LD1_ZPZ_D(hds_le, zsu)
-DO_LD1_ZPZ_D(hds_le, zss)
-DO_LD1_ZPZ_D(hds_le, zd)
+DO_LD1_ZPZ_S(hss_be, zsu, MO_16)
+DO_LD1_ZPZ_S(hss_be, zss, MO_16)
+DO_LD1_ZPZ_D(hds_be, zsu, MO_16)
+DO_LD1_ZPZ_D(hds_be, zss, MO_16)
+DO_LD1_ZPZ_D(hds_be, zd, MO_16)
-DO_LD1_ZPZ_S(hss_be, zsu)
-DO_LD1_ZPZ_S(hss_be, zss)
-DO_LD1_ZPZ_D(hds_be, zsu)
-DO_LD1_ZPZ_D(hds_be, zss)
-DO_LD1_ZPZ_D(hds_be, zd)
+DO_LD1_ZPZ_S(ss_le, zsu, MO_32)
+DO_LD1_ZPZ_S(ss_le, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zsu, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zd, MO_32)
-DO_LD1_ZPZ_S(ss_le, zsu)
-DO_LD1_ZPZ_S(ss_le, zss)
-DO_LD1_ZPZ_D(sdu_le, zsu)
-DO_LD1_ZPZ_D(sdu_le, zss)
-DO_LD1_ZPZ_D(sdu_le, zd)
+DO_LD1_ZPZ_S(ss_be, zsu, MO_32)
+DO_LD1_ZPZ_S(ss_be, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zsu, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zd, MO_32)
-DO_LD1_ZPZ_S(ss_be, zsu)
-DO_LD1_ZPZ_S(ss_be, zss)
-DO_LD1_ZPZ_D(sdu_be, zsu)
-DO_LD1_ZPZ_D(sdu_be, zss)
-DO_LD1_ZPZ_D(sdu_be, zd)
+DO_LD1_ZPZ_D(sds_le, zsu, MO_32)
+DO_LD1_ZPZ_D(sds_le, zss, MO_32)
+DO_LD1_ZPZ_D(sds_le, zd, MO_32)
-DO_LD1_ZPZ_D(sds_le, zsu)
-DO_LD1_ZPZ_D(sds_le, zss)
-DO_LD1_ZPZ_D(sds_le, zd)
+DO_LD1_ZPZ_D(sds_be, zsu, MO_32)
+DO_LD1_ZPZ_D(sds_be, zss, MO_32)
+DO_LD1_ZPZ_D(sds_be, zd, MO_32)
-DO_LD1_ZPZ_D(sds_be, zsu)
-DO_LD1_ZPZ_D(sds_be, zss)
-DO_LD1_ZPZ_D(sds_be, zd)
+DO_LD1_ZPZ_D(dd_le, zsu, MO_64)
+DO_LD1_ZPZ_D(dd_le, zss, MO_64)
+DO_LD1_ZPZ_D(dd_le, zd, MO_64)
-DO_LD1_ZPZ_D(dd_le, zsu)
-DO_LD1_ZPZ_D(dd_le, zss)
-DO_LD1_ZPZ_D(dd_le, zd)
-
-DO_LD1_ZPZ_D(dd_be, zsu)
-DO_LD1_ZPZ_D(dd_be, zss)
-DO_LD1_ZPZ_D(dd_be, zd)
+DO_LD1_ZPZ_D(dd_be, zsu, MO_64)
+DO_LD1_ZPZ_D(dd_be, zss, MO_64)
+DO_LD1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_LD1_ZPZ_S
#undef DO_LD1_ZPZ_D
--
2.20.1
next prev parent reply other threads:[~2020-04-22 4:47 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-22 4:32 [PATCH v3 00/18] target/arm: sve load/store improvements Richard Henderson
2020-04-22 4:32 ` [PATCH v3 01/18] exec: Add block comments for watchpoint routines Richard Henderson
2020-04-27 9:27 ` Peter Maydell
2020-04-22 4:32 ` [PATCH v3 02/18] exec: Fix cpu_watchpoint_address_matches address length Richard Henderson
2020-04-27 9:28 ` Peter Maydell
2020-04-22 4:32 ` [PATCH v3 03/18] accel/tcg: Add block comment for probe_access Richard Henderson
2020-04-22 4:32 ` [PATCH v3 04/18] accel/tcg: Add probe_access_flags Richard Henderson
2020-04-27 10:48 ` Peter Maydell
2020-04-27 16:00 ` Richard Henderson
2020-05-04 9:39 ` Peter Maydell
2020-04-22 4:32 ` [PATCH v3 05/18] accel/tcg: Add endian-specific cpu_{ld, st}* operations Richard Henderson
2020-04-27 9:46 ` Peter Maydell
2020-04-22 4:32 ` [PATCH v3 06/18] target/arm: Use cpu_*_data_ra for sve_ldst_tlb_fn Richard Henderson
2020-04-27 10:51 ` Peter Maydell
2020-04-22 4:32 ` [PATCH v3 07/18] target/arm: Drop manual handling of set/clear_helper_retaddr Richard Henderson
2020-04-22 4:32 ` [PATCH v3 08/18] target/arm: Add sve infrastructure for page lookup Richard Henderson
2020-04-27 11:00 ` Peter Maydell
2020-04-22 4:33 ` [PATCH v3 09/18] target/arm: Adjust interface of sve_ld1_host_fn Richard Henderson
2020-04-22 4:33 ` [PATCH v3 10/18] target/arm: Use SVEContLdSt in sve_ld1_r Richard Henderson
2020-04-22 4:33 ` [PATCH v3 11/18] target/arm: Handle watchpoints " Richard Henderson
2020-04-22 4:33 ` [PATCH v3 12/18] target/arm: Use SVEContLdSt for multi-register contiguous loads Richard Henderson
2020-04-22 4:33 ` [PATCH v3 13/18] target/arm: Update contiguous first-fault and no-fault loads Richard Henderson
2020-04-27 11:03 ` Peter Maydell
2020-04-27 16:16 ` Richard Henderson
2020-04-27 16:32 ` Peter Maydell
2020-04-27 16:45 ` Richard Henderson
2020-04-27 18:38 ` Peter Maydell
2020-04-28 15:02 ` Richard Henderson
2020-04-22 4:33 ` [PATCH v3 14/18] target/arm: Use SVEContLdSt for contiguous stores Richard Henderson
2020-04-22 4:33 ` [PATCH v3 15/18] target/arm: Reuse sve_probe_page for gather first-fault loads Richard Henderson
2020-04-22 4:33 ` [PATCH v3 16/18] target/arm: Reuse sve_probe_page for scatter stores Richard Henderson
2020-04-22 4:33 ` Richard Henderson [this message]
2020-04-22 4:33 ` [PATCH v3 18/18] target/arm: Remove sve_memopidx Richard Henderson
2020-04-22 5:37 ` [PATCH v3 00/18] target/arm: sve load/store improvements no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200422043309.18430-18-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).