* [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting
@ 2012-10-20 17:28 Aurelien Jarno
2012-10-20 17:28 ` [Qemu-devel] [PATCH 2/2] tcg/i386: remove ld/st third argument register constraint Aurelien Jarno
2012-10-21 6:41 ` [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting Richard Henderson
0 siblings, 2 replies; 4+ messages in thread
From: Aurelien Jarno @ 2012-10-20 17:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Now that CONFIG_TCG_PASS_AREG0 has been removed, it's easier to get
an optimal code for the load/store functions.
First swap the two registers used in tcg_out_tlb_load() so that the
address end-up in the second register instead of the first one. Adjust
tcg_out_qemu_ld() and tcg_out_qemu_st() to respectively call
tcg_out_qemu_ld_direct() and tcg_out_qemu_st_direct() with the correct
registers. Then replace the register shifting by direct load of the
arguments.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
tcg/i386/tcg-target.c | 73 +++++++++++++++++++++----------------------------
1 file changed, 31 insertions(+), 42 deletions(-)
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4952c05..4c59e33 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1016,12 +1016,12 @@ static const void *qemu_st_helpers[4] = {
LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
positions of the displacements of forward jumps to the TLB miss case.
- First argument register is loaded with the low part of the address.
+ Second argument register is loaded with the low part of the address.
In the TLB hit case, it has been adjusted as indicated by the TLB
and so is a host address. In the TLB miss case, it continues to
hold a guest address.
- Second argument register is clobbered. */
+ First argument register is clobbered. */
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
int mem_index, int s_bits,
@@ -1039,25 +1039,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
rexw = P_REXW;
}
- tcg_out_mov(s, type, r1, addrlo);
tcg_out_mov(s, type, r0, addrlo);
+ tcg_out_mov(s, type, r1, addrlo);
- tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+ tcg_out_shifti(s, SHIFT_SHR + rexw, r0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tgen_arithi(s, ARITH_AND + rexw, r0,
- TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
tgen_arithi(s, ARITH_AND + rexw, r1,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+ tgen_arithi(s, ARITH_AND + rexw, r0,
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
- tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0,
offsetof(CPUArchState, tlb_table[mem_index][0])
+ which);
- /* cmp 0(r1), r0 */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+ /* cmp 0(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0);
- tcg_out_mov(s, type, r0, addrlo);
+ tcg_out_mov(s, type, r1, addrlo);
/* jne label1 */
tcg_out8(s, OPC_JCC_short + JCC_JNE);
@@ -1065,8 +1065,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
s->code_ptr++;
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- /* cmp 4(r1), addrhi */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+ /* cmp 4(r0), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
/* jne label1 */
tcg_out8(s, OPC_JCC_short + JCC_JNE);
@@ -1076,8 +1076,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
/* TLB Hit. */
- /* add addend(r1), r0 */
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+ /* add addend(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
offsetof(CPUTLBEntry, addend) - which);
}
#endif
@@ -1169,9 +1169,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
int addrlo_idx;
#if defined(CONFIG_SOFTMMU)
int mem_index, s_bits;
-#if TCG_TARGET_REG_BITS == 64
- int arg_idx;
-#else
+#if TCG_TARGET_REG_BITS == 32
int stack_adjust;
#endif
uint8_t *label_ptr[3];
@@ -1192,7 +1190,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1220,15 +1218,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
tcg_out_push(s, TCG_AREG0);
stack_adjust += 4;
#else
- /* The first argument is already loaded with addrlo. */
- arg_idx = 1;
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
- mem_index);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
#endif
tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1294,9 +1286,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
use the ADDR32 prefix. For now, do nothing. */
if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+ base = TCG_REG_L1;
offset = 0;
}
}
@@ -1317,8 +1309,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
/* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value
instead of moving to the scratch. But as it is, the L constraint
- means that TCG_REG_L1 is definitely free here. */
- const int scratch = TCG_REG_L1;
+ means that TCG_REG_L0 is definitely free here. */
+ const int scratch = TCG_REG_L0;
switch (sizeop) {
case 0:
@@ -1391,7 +1383,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1425,15 +1417,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
tcg_out_push(s, TCG_AREG0);
stack_adjust += 4;
#else
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
- TCG_REG_L1, data_reg);
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
+ tcg_target_call_iarg_regs[2], data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
stack_adjust = 0;
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
#endif
tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
@@ -1460,9 +1449,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
use the ADDR32 prefix. For now, do nothing. */
if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+ base = TCG_REG_L1;
offset = 0;
}
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread* [Qemu-devel] [PATCH 2/2] tcg/i386: remove ld/st third argument register constraint
2012-10-20 17:28 [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting Aurelien Jarno
@ 2012-10-20 17:28 ` Aurelien Jarno
2012-10-21 6:42 ` Richard Henderson
2012-10-21 6:41 ` [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting Richard Henderson
1 sibling, 1 reply; 4+ messages in thread
From: Aurelien Jarno @ 2012-10-20 17:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
On x86_64, remove the constraint on the third argument register which
is not needed:
- For loads the helper arguments are env, addr, mem_idx. The addr
value should not be in the two first argument registers as they are
used in tcg_out_tlb_load().
- For stores the helper arguments are env, addr, data, mem_idx.
The addr and data values should not be in the two first argument
registers as they are used in tcg_out_tlb_load(). The data value
should also not be in the two first argument registers, but could
be in the third argument register in which case it would be already
loaded at the right location.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
tcg/i386/tcg-target.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4c59e33..9c8f69a 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -92,7 +92,6 @@ static const int tcg_target_call_oarg_regs[] = {
#if TCG_TARGET_REG_BITS == 64
# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
-# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
#else
# define TCG_REG_L0 TCG_REG_EAX
# define TCG_REG_L1 TCG_REG_EDX
@@ -181,14 +180,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct->ct |= TCG_CT_REG;
#if TCG_TARGET_REG_BITS == 64
tcg_regset_set32(ct->u.regs, 0, 0xffff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
#else
tcg_regset_set32(ct->u.regs, 0, 0xff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
#endif
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
break;
case 'e':
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [Qemu-devel] [PATCH 2/2] tcg/i386: remove ld/st third argument register constraint
2012-10-20 17:28 ` [Qemu-devel] [PATCH 2/2] tcg/i386: remove ld/st third argument register constraint Aurelien Jarno
@ 2012-10-21 6:42 ` Richard Henderson
0 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2012-10-21 6:42 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 2012-10-21 03:28, Aurelien Jarno wrote:
> On x86_64, remove the constraint on the third argument register which
> is not needed:
> - For loads the helper arguments are env, addr, mem_idx. The addr
> value should not be in the two first argument registers as they are
> used in tcg_out_tlb_load().
> - For stores the helper arguments are env, addr, data, mem_idx.
> The addr and data values should not be in the two first argument
> registers as they are used in tcg_out_tlb_load(). The data value
> should also not be in the two first argument registers, but could
> be in the third argument register in which case it would be already
> loaded at the right location.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting
2012-10-20 17:28 [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting Aurelien Jarno
2012-10-20 17:28 ` [Qemu-devel] [PATCH 2/2] tcg/i386: remove ld/st third argument register constraint Aurelien Jarno
@ 2012-10-21 6:41 ` Richard Henderson
1 sibling, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2012-10-21 6:41 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 2012-10-21 03:28, Aurelien Jarno wrote:
> Now that CONFIG_TCG_PASS_AREG0 has been removed, it's easier to get
> an optimal code for the load/store functions.
>
> First swap the two registers used in tcg_out_tlb_load() so that the
> address end-up in the second register instead of the first one. Adjust
> tcg_out_qemu_ld() and tcg_out_qemu_st() to respectively call
> tcg_out_qemu_ld_direct() and tcg_out_qemu_st_direct() with the correct
> registers. Then replace the register shifting by direct load of the
> arguments.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2012-10-21 6:42 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-20 17:28 [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting Aurelien Jarno
2012-10-20 17:28 ` [Qemu-devel] [PATCH 2/2] tcg/i386: remove ld/st third argument register constraint Aurelien Jarno
2012-10-21 6:42 ` Richard Henderson
2012-10-21 6:41 ` [Qemu-devel] [PATCH 1/2] tcg/i386: remove suboptimal register shifting Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).