* [Qemu-devel] [PATCH v3 for 1.3 1/3] tcg/arm: fix TLB access in qemu-ld/st ops
2012-11-05 7:29 [Qemu-devel] [PATCH v3 for 1.3 0/3] tcg/arm: misc fixes Aurelien Jarno
@ 2012-11-05 7:29 ` Aurelien Jarno
2012-11-05 7:29 ` [Qemu-devel] [PATCH v3 for 1.3 2/3] tcg/arm: fix cross-endian qemu_st16 Aurelien Jarno
2012-11-05 7:29 ` [Qemu-devel] [PATCH v3 for 1.3 3/3] target-openrisc: remove conflicting definitions from cpu.h Aurelien Jarno
2 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2012-11-05 7:29 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell, qemu-stable, Aurelien Jarno
The TCG arm backend considers likely that the offset to the TLB
entries does not exceed 12 bits for mem_index = 0. In practice this is
not true for at least the MIPS target.
The current patch fixes that by loading the bits 23-12 with a separate
instruction, and using loads with address writeback, independently of
the value of mem_idx. In total this allow a 24-bit offset, which is a
lot more than needed.
Cc: Andrzej Zaborowski <balrogg@gmail.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-stable@nongnu.org
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
tcg/arm/tcg-target.c | 78 +++++++++++++++++++++++++++-----------------------
1 file changed, 42 insertions(+), 36 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index e790bf0..9550102 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -639,6 +639,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int cond,
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
}
+/* Offset pre-increment with base writeback. */
+static inline void tcg_out_ld32_12wb(TCGContext *s, int cond,
+ int rd, int rn, tcg_target_long im)
+{
+ /* ldr with writeback and both register equals is UNPREDICTABLE */
+ assert(rd != rn);
+
+ if (im >= 0) {
+ tcg_out32(s, (cond << 28) | 0x05b00000 |
+ (rn << 16) | (rd << 12) | (im & 0xfff));
+ } else {
+ tcg_out32(s, (cond << 28) | 0x05300000 |
+ (rn << 16) | (rd << 12) | ((-im) & 0xfff));
+ }
+}
+
static inline void tcg_out_st32_12(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
@@ -1071,7 +1087,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, data_reg, data_reg2, bswap;
#ifdef CONFIG_SOFTMMU
- int mem_index, s_bits;
+ int mem_index, s_bits, tlb_offset;
TCGReg argreg;
# if TARGET_LONG_BITS == 64
int addr_reg2;
@@ -1111,19 +1127,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
- /* In the
- * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
- * below, the offset is likely to exceed 12 bits if mem_index != 0 and
- * not exceed otherwise, so use an
- * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
- * before.
- */
- if (mem_index)
+ /* We assume that the offset is contained within 20 bits. */
+ tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
+ assert(tlb_offset & ~0xfffff == 0);
+ if (tlb_offset > 0xfff) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
- (mem_index << (TLB_SHIFT & 1)) |
- ((16 - (TLB_SHIFT >> 1)) << 8));
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_read));
+ 0xa00 | (tlb_offset >> 12));
+ tlb_offset &= 0xfff;
+ }
+ tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
/* Check alignment. */
@@ -1131,15 +1143,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
0, addr_reg, (1 << s_bits) - 1);
# if TARGET_LONG_BITS == 64
- /* XXX: possibly we could use a block data load or writeback in
- * the first access. */
- tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
+ /* XXX: possibly we could use a block data load in the first access. */
+ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
# endif
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addend));
+ offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_read));
switch (opc) {
case 0:
@@ -1288,7 +1299,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, data_reg, data_reg2, bswap;
#ifdef CONFIG_SOFTMMU
- int mem_index, s_bits;
+ int mem_index, s_bits, tlb_offset;
TCGReg argreg;
# if TARGET_LONG_BITS == 64
int addr_reg2;
@@ -1325,19 +1336,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
- /* In the
- * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))]
- * below, the offset is likely to exceed 12 bits if mem_index != 0 and
- * not exceed otherwise, so use an
- * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
- * before.
- */
- if (mem_index)
+ /* We assume that the offset is contained within 20 bits. */
+ tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+ assert(tlb_offset & ~0xfffff == 0);
+ if (tlb_offset > 0xfff) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
- (mem_index << (TLB_SHIFT & 1)) |
- ((16 - (TLB_SHIFT >> 1)) << 8));
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_write));
+ 0xa00 | (tlb_offset >> 12));
+ tlb_offset &= 0xfff;
+ }
+ tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
/* Check alignment. */
@@ -1345,15 +1352,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
0, addr_reg, (1 << s_bits) - 1);
# if TARGET_LONG_BITS == 64
- /* XXX: possibly we could use a block data load or writeback in
- * the first access. */
- tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
+ /* XXX: possibly we could use a block data load in the first access. */
+ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
# endif
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addend));
+ offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_write));
switch (opc) {
case 0:
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH v3 for 1.3 2/3] tcg/arm: fix cross-endian qemu_st16
2012-11-05 7:29 [Qemu-devel] [PATCH v3 for 1.3 0/3] tcg/arm: misc fixes Aurelien Jarno
2012-11-05 7:29 ` [Qemu-devel] [PATCH v3 for 1.3 1/3] tcg/arm: fix TLB access in qemu-ld/st ops Aurelien Jarno
@ 2012-11-05 7:29 ` Aurelien Jarno
2012-11-05 7:29 ` [Qemu-devel] [PATCH v3 for 1.3 3/3] target-openrisc: remove conflicting definitions from cpu.h Aurelien Jarno
2 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2012-11-05 7:29 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell, qemu-stable, Aurelien Jarno
The bswap16 TCG opcode assumes that the high bytes of the temp equal
to 0 before calling it. The ARM backend implementation takes this
assumption to slightly optimize the generated code.
The same implementation is called for implementing the cross-endian
qemu_st16 opcode, where this assumption is not true anymore. One way to
fix that would be to zero the high bytes before calling it. Given the
store instruction just ignore them, it is possible to provide a slightly
more optimized version. With ARMv6+ the rev16 instruction does the work
correctly. For lower ARM versions the patch provides a version which
behaves correctly with non-zero high bytes, but fill them with junk.
Cc: Andrzej Zaborowski <balrogg@gmail.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-stable@nongnu.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
tcg/arm/tcg-target.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 9550102..47612fe 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -611,6 +611,22 @@ static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
}
}
+/* swap the two low bytes assuming that the two high input bytes and the
+ two high output bit can hold any value. */
+static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* rev16 */
+ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_R8, 0, rn, SHIFT_IMM_LSR(8));
+ tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_R8, TCG_REG_R8, 0xff);
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ rd, TCG_REG_R8, rn, SHIFT_IMM_LSL(8));
+ }
+}
+
static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
{
if (use_armv6_instructions) {
@@ -1367,7 +1383,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
break;
case 1:
if (bswap) {
- tcg_out_bswap16(s, COND_EQ, TCG_REG_R0, data_reg);
+ tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg);
tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
} else {
tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
@@ -1453,7 +1469,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
break;
case 1:
if (bswap) {
- tcg_out_bswap16(s, COND_AL, TCG_REG_R0, data_reg);
+ tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0);
} else {
tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH v3 for 1.3 3/3] target-openrisc: remove conflicting definitions from cpu.h
2012-11-05 7:29 [Qemu-devel] [PATCH v3 for 1.3 0/3] tcg/arm: misc fixes Aurelien Jarno
2012-11-05 7:29 ` [Qemu-devel] [PATCH v3 for 1.3 1/3] tcg/arm: fix TLB access in qemu-ld/st ops Aurelien Jarno
2012-11-05 7:29 ` [Qemu-devel] [PATCH v3 for 1.3 2/3] tcg/arm: fix cross-endian qemu_st16 Aurelien Jarno
@ 2012-11-05 7:29 ` Aurelien Jarno
2 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2012-11-05 7:29 UTC (permalink / raw)
To: qemu-devel; +Cc: Jia Liu, Aurelien Jarno, qemu-stable
On an ARM host, the registers definitions from cpu.h clash
with /usr/include/sys/ucontext.h. As there are unused, just remove
them.
Cc: Jia Liu <proljc@gmail.com>
Cc: qemu-stable@nongnu.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-openrisc/cpu.h | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index d42ffb0..ebb5ad3 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -89,24 +89,6 @@ enum {
/* Interrupt */
#define NR_IRQS 32
-/* Registers */
-enum {
- R0 = 0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10,
- R11, R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29, R30,
- R31
-};
-
-/* Register aliases */
-enum {
- R_ZERO = R0,
- R_SP = R1,
- R_FP = R2,
- R_LR = R9,
- R_RV = R11,
- R_RVH = R12
-};
-
/* Unit presece register */
enum {
UPR_UP = (1 << 0),
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread