From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH v4 38/39] tcg/arm: Use LDRD to load tlb mask+table
Date: Tue, 4 Jun 2019 15:33:50 -0500 [thread overview]
Message-ID: <20190604203351.27778-39-richard.henderson@linaro.org> (raw)
In-Reply-To: <20190604203351.27778-1-richard.henderson@linaro.org>
This changes the code generation for the tlb from e.g.
ldr ip, [r6, #-0x10]
ldr r2, [r6, #-0xc]
and ip, ip, r4, lsr #8
ldrd r0, r1, [r2, ip]!
ldr r2, [r2, #0x18]
to
ldrd r0, r1, [r6, #-0x10]
and r0, r0, r4, lsr #8
ldrd r2, r3, [r1, r0]!
ldr r1, [r1, #0x18]
for armv7 hosts. Rearranging the register allocation in
order to avoid overlap between the two ldrd pairs causes
the patch to be larger than it ordinarily would be.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Add QEMU_BUILD_BUG_ON for mask/table ordering; comment fixes.
---
tcg/arm/tcg-target.inc.c | 92 +++++++++++++++++++++++-----------------
1 file changed, 53 insertions(+), 39 deletions(-)
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index b066e30f0e..276e843627 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -267,6 +267,7 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
#endif
break;
@@ -1224,6 +1225,10 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
+/* These offsets are built into the LDRD below. */
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
+
/* Load and compare a TLB entry, leaving the flags set. Returns the register
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
@@ -1238,47 +1243,54 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, TCG_AREG0, mask_off);
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, TCG_AREG0, table_off);
-
- /* Extract the tlb index from the address into TMP. */
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo,
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
-
/*
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R2.
- * Load the tlb comparator into R0/R1 and the fast path addend into R2.
+ * We don't support inline unaligned acceses, but we can easily
+ * support overalignment checks.
*/
- if (cmp_off == 0) {
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
- } else {
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
- }
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
- TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0);
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
- } else {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
- }
- }
- if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
- }
-
- /* Load the tlb addend. */
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2,
- offsetof(CPUTLBEntry, addend));
-
- /* Check alignment. We don't support inline unaligned acceses,
- but we can easily support overalignment checks. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
+ if (use_armv6_instructions) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
+ }
+
+ /* Extract the tlb index from the address into R0. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
+
+ /*
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
+ */
+ if (cmp_off == 0) {
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ } else {
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ }
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ }
+ }
+ if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
+ }
+
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
+ offsetof(CPUTLBEntry, addend));
+
+ /* Check alignment, check comparators. */
if (use_armv7_instructions) {
tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
int rot = encode_imm(mask);
@@ -1291,22 +1303,24 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
addrlo, TCG_REG_TMP, 0);
}
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
} else {
if (a_bits) {
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
(1 << a_bits) - 1);
}
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
- 0, TCG_REG_R0, TCG_REG_TMP,
+ 0, TCG_REG_R2, TCG_REG_TMP,
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
}
if (TARGET_LONG_BITS == 64) {
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
}
- return TCG_REG_R2;
+ return TCG_REG_R1;
}
/* Record the context of a call to the out of line helper code for the slow
--
2.17.1
next prev parent reply other threads:[~2019-06-04 21:08 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-04 20:33 [Qemu-devel] [PATCH v4 00/39] tcg: Move the softmmu tlb to CPUNegativeOffsetState Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 01/39] tcg: Fold CPUTLBWindow into CPUTLBDesc Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 02/39] tcg: Split out target/arch/cpu-param.h Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 03/39] tcg: Create struct CPUTLB Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 04/39] cpu: Define CPUArchState with typedef Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 05/39] cpu: Define ArchCPU Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 06/39] cpu: Replace ENV_GET_CPU with env_cpu Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 07/39] cpu: Introduce env_archcpu Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 08/39] target/alpha: Use env_cpu, env_archcpu Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 09/39] target/arm: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 10/39] target/cris: Reindent mmu.c Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 11/39] target/cris: Reindent op_helper.c Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 12/39] target/cris: Use env_cpu, env_archcpu Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 13/39] target/hppa: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 14/39] target/i386: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 15/39] target/lm32: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 16/39] target/m68k: " Richard Henderson
2019-06-05 11:15 ` Laurent Vivier
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 17/39] target/microblaze: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 18/39] target/mips: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 19/39] target/moxie: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 20/39] target/nios2: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 21/39] target/openrisc: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 22/39] target/ppc: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 23/39] target/riscv: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 24/39] target/s390x: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 25/39] target/sh4: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 26/39] target/sparc: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 27/39] target/tilegx: Use env_cpu Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 28/39] target/tricore: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 29/39] target/unicore32: Use env_cpu, env_archcpu Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 30/39] target/xtensa: " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 31/39] cpu: Move ENV_OFFSET to exec/gen-icount.h Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 32/39] cpu: Introduce cpu_set_cpustate_pointers Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 33/39] cpu: Introduce CPUNegativeOffsetState Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 34/39] cpu: Move icount_decr to CPUNegativeOffsetState Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 35/39] cpu: Move the softmmu tlb " Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 36/39] cpu: Remove CPU_COMMON Richard Henderson
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 37/39] tcg/aarch64: Use LDP to load tlb mask+table Richard Henderson
2019-06-04 20:33 ` Richard Henderson [this message]
2019-06-07 10:24 ` [Qemu-devel] [PATCH v4 38/39] tcg/arm: Use LDRD " Peter Maydell
2019-06-04 20:33 ` [Qemu-devel] [PATCH v4 39/39] tcg/arm: Remove mostly unreachable tlb special case Richard Henderson
2019-06-04 22:28 ` [Qemu-devel] [PATCH v4 00/39] tcg: Move the softmmu tlb to CPUNegativeOffsetState no-reply
2019-06-04 22:48 ` no-reply
2019-06-04 22:54 ` no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190604203351.27778-39-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).