* [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
@ 2012-10-22 2:11 Richard Henderson
2012-10-22 5:59 ` Aurelien Jarno
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2012-10-22 2:11 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
When we allocate a reserved_va for the guest, the kernel will likely
choose an address well above 4G. At which point we must use a pair
of movabsq+addq to form the host address. If we have OS support,
set up a segment register to point to guest_base instead.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/i386/tcg-target.c | 151 +++++++++++++++++++++++++++++++-------------------
1 file changed, 95 insertions(+), 56 deletions(-)
I replaced the ADDR32 comment. I did not use the arch_prctl entry
point present in libc, because no header supplies the prototype.
This is true even of mainline glibc.
r~
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4952c05..4f7a235 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -236,11 +236,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXW 0x800 /* Set REX.W = 1 */
# define P_REXB_R 0x1000 /* REG field as byte register */
# define P_REXB_RM 0x2000 /* R/M field as byte register */
+# define P_GS 0x4000 /* gs segment override */
#else
# define P_ADDR32 0
# define P_REXW 0
# define P_REXB_R 0
# define P_REXB_RM 0
+# define P_GS 0
#endif
#define OPC_ARITH_EvIz (0x81)
@@ -356,6 +358,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
{
int rex;
+ if (opc & P_GS) {
+ tcg_out8(s, 0x65);
+ }
if (opc & P_DATA16) {
/* We should never be asking for both 16 and 64-bit operation. */
assert((opc & P_REXW) == 0);
@@ -1080,10 +1085,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
offsetof(CPUTLBEntry, addend) - which);
}
-#endif
+#elif defined(__x86_64__) && defined(__linux__)
+# include <sys/syscall.h>
+# include <asm/prctl.h>
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
+ guest_base_flags = P_GS;
+ }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1092,28 +1112,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#endif
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
break;
case 0 | 4:
- tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
break;
case 1:
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
if (bswap) {
tcg_out_rolw_8(s, datalo);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
tcg_out_rolw_8(s, datalo);
tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
+ datalo, base, ofs);
}
break;
case 2:
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
@@ -1121,17 +1142,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#if TCG_TARGET_REG_BITS == 64
case 2 | 4:
if (bswap) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
tcg_out_bswap32(s, datalo);
tcg_out_ext32s(s, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
}
break;
#endif
case 3:
if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+ datalo, base, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
@@ -1142,11 +1164,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
datahi = t;
}
if (base != datalo) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
} else {
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
}
if (bswap) {
tcg_out_bswap32(s, datalo);
@@ -1192,7 +1218,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1285,29 +1311,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
- offset = 0;
- }
+ int seg = 0;
+
+ /* ??? We assume all operations have left us with register contents
+ that are zero extended. So far this appears to be true. If we
+ want to enforce this, we can either do an explicit zero-extension
+ here, or (if GUEST_BASE == 0, or a segment register is in use)
+ use the ADDR32 prefix. For now, do nothing. */
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ offset = 0;
}
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1322,7 +1350,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+ datalo, base, ofs);
break;
case 1:
if (bswap) {
@@ -1330,7 +1359,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+ datalo, base, ofs);
break;
case 2:
if (bswap) {
@@ -1338,7 +1368,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
break;
case 3:
if (TCG_TARGET_REG_BITS == 64) {
@@ -1347,17 +1377,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+ datalo, base, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
} else {
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
}
break;
default:
@@ -1391,7 +1422,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1451,23 +1482,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
- offset = 0;
- }
+ int seg = 0;
+
+ /* ??? We assume all operations have left us with register contents
+ that are zero extended. So far this appears to be true. If we
+ want to enforce this, we can either do an explicit zero-extension
+ here, or (if GUEST_BASE == 0, or a segment register is in use)
+ use the ADDR32 prefix. For now, do nothing. */
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ offset = 0;
}
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
@@ -2061,6 +2093,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
}
tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+ /* Try to set up a segment register to point to GUEST_BASE. */
+ if (GUEST_BASE) {
+ setup_guest_base_seg();
+ }
+#endif
}
static void tcg_target_init(TCGContext *s)
--
1.7.11.7
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-22 2:11 [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE Richard Henderson
@ 2012-10-22 5:59 ` Aurelien Jarno
2012-10-22 21:19 ` Richard Henderson
0 siblings, 1 reply; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-22 5:59 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Mon, Oct 22, 2012 at 12:11:07PM +1000, Richard Henderson wrote:
> When we allocate a reserved_va for the guest, the kernel will likely
> choose an address well above 4G. At which point we must use a pair
> of movabsq+addq to form the host address. If we have OS support,
> set up a segment register to point to guest_base instead.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/i386/tcg-target.c | 151 +++++++++++++++++++++++++++++++-------------------
> 1 file changed, 95 insertions(+), 56 deletions(-)
>
> I replaced the ADDR32 comment. I did not use the arch_prctl entry
> point present in libc, because no header supplies the prototype.
> This is true even of mainline glibc.
Yes, this is a know fact that this libc function doesn't have a
prototype (for instance in the manpage), that said it exists and at
least on x32 it's actually a wrapper doing things.
That's why I think it's better to use it than doing the syscall
directly.
Otherwise the patch now looks fine to me, though I still haven't done
any benchmark.
> r~
>
>
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4952c05..4f7a235 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -236,11 +236,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
> # define P_REXW 0x800 /* Set REX.W = 1 */
> # define P_REXB_R 0x1000 /* REG field as byte register */
> # define P_REXB_RM 0x2000 /* R/M field as byte register */
> +# define P_GS 0x4000 /* gs segment override */
> #else
> # define P_ADDR32 0
> # define P_REXW 0
> # define P_REXB_R 0
> # define P_REXB_RM 0
> +# define P_GS 0
> #endif
>
> #define OPC_ARITH_EvIz (0x81)
> @@ -356,6 +358,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
> {
> int rex;
>
> + if (opc & P_GS) {
> + tcg_out8(s, 0x65);
> + }
> if (opc & P_DATA16) {
> /* We should never be asking for both 16 and 64-bit operation. */
> assert((opc & P_REXW) == 0);
> @@ -1080,10 +1085,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
> tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
> offsetof(CPUTLBEntry, addend) - which);
> }
> -#endif
> +#elif defined(__x86_64__) && defined(__linux__)
> +# include <sys/syscall.h>
> +# include <asm/prctl.h>
> +
> +static int guest_base_flags;
> +static inline void setup_guest_base_seg(void)
> +{
> + if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
> + guest_base_flags = P_GS;
> + }
> +}
> +#else
> +# define guest_base_flags 0
> +static inline void setup_guest_base_seg(void) { }
> +#endif /* SOFTMMU */
>
> static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> - int base, tcg_target_long ofs, int sizeop)
> + int base, tcg_target_long ofs, int seg,
> + int sizeop)
> {
> #ifdef TARGET_WORDS_BIGENDIAN
> const int bswap = 1;
> @@ -1092,28 +1112,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> #endif
> switch (sizeop) {
> case 0:
> - tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
> break;
> case 0 | 4:
> - tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
> break;
> case 1:
> - tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
> if (bswap) {
> tcg_out_rolw_8(s, datalo);
> }
> break;
> case 1 | 4:
> if (bswap) {
> - tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
> tcg_out_rolw_8(s, datalo);
> tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
> } else {
> - tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
> + datalo, base, ofs);
> }
> break;
> case 2:
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
> if (bswap) {
> tcg_out_bswap32(s, datalo);
> }
> @@ -1121,17 +1142,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> #if TCG_TARGET_REG_BITS == 64
> case 2 | 4:
> if (bswap) {
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
> tcg_out_bswap32(s, datalo);
> tcg_out_ext32s(s, datalo, datalo);
> } else {
> - tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
> }
> break;
> #endif
> case 3:
> if (TCG_TARGET_REG_BITS == 64) {
> - tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
> + datalo, base, ofs);
> if (bswap) {
> tcg_out_bswap64(s, datalo);
> }
> @@ -1142,11 +1164,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> datahi = t;
> }
> if (base != datalo) {
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> - tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datahi, base, ofs + 4);
> } else {
> - tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datahi, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datalo, base, ofs);
> }
> if (bswap) {
> tcg_out_bswap32(s, datalo);
> @@ -1192,7 +1218,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> label_ptr, offsetof(CPUTLBEntry, addr_read));
>
> /* TLB Hit. */
> - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
> + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
>
> /* jmp label2 */
> tcg_out8(s, OPC_JMP_short);
> @@ -1285,29 +1311,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> {
> int32_t offset = GUEST_BASE;
> int base = args[addrlo_idx];
> -
> - if (TCG_TARGET_REG_BITS == 64) {
> - /* ??? We assume all operations have left us with register
> - contents that are zero extended. So far this appears to
> - be true. If we want to enforce this, we can either do
> - an explicit zero-extension here, or (if GUEST_BASE == 0)
> - use the ADDR32 prefix. For now, do nothing. */
> -
> - if (offset != GUEST_BASE) {
> - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> - base = TCG_REG_L0;
> - offset = 0;
> - }
> + int seg = 0;
> +
> + /* ??? We assume all operations have left us with register contents
> + that are zero extended. So far this appears to be true. If we
> + want to enforce this, we can either do an explicit zero-extension
> + here, or (if GUEST_BASE == 0, or a segment register is in use)
> + use the ADDR32 prefix. For now, do nothing. */
> + if (GUEST_BASE && guest_base_flags) {
> + seg = guest_base_flags;
> + offset = 0;
> + } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> + base = TCG_REG_L0;
> + offset = 0;
> }
>
> - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
> + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
> }
> #endif
> }
>
> static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> - int base, tcg_target_long ofs, int sizeop)
> + int base, tcg_target_long ofs, int seg,
> + int sizeop)
> {
> #ifdef TARGET_WORDS_BIGENDIAN
> const int bswap = 1;
> @@ -1322,7 +1350,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
>
> switch (sizeop) {
> case 0:
> - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
> + datalo, base, ofs);
> break;
> case 1:
> if (bswap) {
> @@ -1330,7 +1359,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> tcg_out_rolw_8(s, scratch);
> datalo = scratch;
> }
> - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
> + datalo, base, ofs);
> break;
> case 2:
> if (bswap) {
> @@ -1338,7 +1368,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> tcg_out_bswap32(s, scratch);
> datalo = scratch;
> }
> - tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
> break;
> case 3:
> if (TCG_TARGET_REG_BITS == 64) {
> @@ -1347,17 +1377,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> tcg_out_bswap64(s, scratch);
> datalo = scratch;
> }
> - tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
> + datalo, base, ofs);
> } else if (bswap) {
> tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
> tcg_out_bswap32(s, scratch);
> - tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
> tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
> tcg_out_bswap32(s, scratch);
> - tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
> } else {
> - tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
> - tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
> }
> break;
> default:
> @@ -1391,7 +1422,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> label_ptr, offsetof(CPUTLBEntry, addr_write));
>
> /* TLB Hit. */
> - tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
> + tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
>
> /* jmp label2 */
> tcg_out8(s, OPC_JMP_short);
> @@ -1451,23 +1482,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> {
> int32_t offset = GUEST_BASE;
> int base = args[addrlo_idx];
> -
> - if (TCG_TARGET_REG_BITS == 64) {
> - /* ??? We assume all operations have left us with register
> - contents that are zero extended. So far this appears to
> - be true. If we want to enforce this, we can either do
> - an explicit zero-extension here, or (if GUEST_BASE == 0)
> - use the ADDR32 prefix. For now, do nothing. */
> -
> - if (offset != GUEST_BASE) {
> - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> - base = TCG_REG_L0;
> - offset = 0;
> - }
> + int seg = 0;
> +
> + /* ??? We assume all operations have left us with register contents
> + that are zero extended. So far this appears to be true. If we
> + want to enforce this, we can either do an explicit zero-extension
> + here, or (if GUEST_BASE == 0, or a segment register is in use)
> + use the ADDR32 prefix. For now, do nothing. */
> + if (GUEST_BASE && guest_base_flags) {
> + seg = guest_base_flags;
> + offset = 0;
> + } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> + base = TCG_REG_L0;
> + offset = 0;
> }
>
> - tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
> + tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
> }
> #endif
> }
> @@ -2061,6 +2093,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
> tcg_out_pop(s, tcg_target_callee_save_regs[i]);
> }
> tcg_out_opc(s, OPC_RET, 0, 0, 0);
> +
> +#if !defined(CONFIG_SOFTMMU)
> + /* Try to set up a segment register to point to GUEST_BASE. */
> + if (GUEST_BASE) {
> + setup_guest_base_seg();
> + }
> +#endif
> }
>
> static void tcg_target_init(TCGContext *s)
> --
> 1.7.11.7
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-22 5:59 ` Aurelien Jarno
@ 2012-10-22 21:19 ` Richard Henderson
2012-10-29 14:34 ` Aurelien Jarno
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2012-10-22 21:19 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 2012-10-22 15:59, Aurelien Jarno wrote:
> Yes, this is a know fact that this libc function doesn't have a
> prototype (for instance in the manpage), that said it exists and at
> least on x32 it's actually a wrapper doing things.
>
> That's why I think it's better to use it than doing the syscall
> directly.
Well, we know that arch_prctl is not a wrapper for x86_64. And x32
will never be a concern for qemu. But I guess I can change this if
you really really insist.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-22 21:19 ` Richard Henderson
@ 2012-10-29 14:34 ` Aurelien Jarno
0 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-29 14:34 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Tue, Oct 23, 2012 at 07:19:20AM +1000, Richard Henderson wrote:
> On 2012-10-22 15:59, Aurelien Jarno wrote:
> > Yes, this is a know fact that this libc function doesn't have a
> > prototype (for instance in the manpage), that said it exists and at
> > least on x32 it's actually a wrapper doing things.
> >
> > That's why I think it's better to use it than doing the syscall
> > directly.
>
> Well, we know that arch_prctl is not a wrapper for x86_64. And x32
> will never be a concern for qemu. But I guess I can change this if
> you really really insist.
>
My point here is that arch_prctl might be a wrapper for x86_64 at some
point, so it's better to do it properly now, and not to have to change
it latter.
In order to avoid one more round trip for a new version of the patch, I
have applied it doing this small changes.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
@ 2012-10-18 3:28 Richard Henderson
2012-10-18 3:36 ` [Qemu-devel] [PATCH v2] " Richard Henderson
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2012-10-18 3:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
When we allocate a reserved_va for the guest, the kernel will likely
choose an address well above 4G. At which point we must use a pair
of movabsq+addq to form the host address. If we have OS support,
set up a segment register to point to guest_base instead.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/i386/tcg-target.c | 146 +++++++++++++++++++++++++++++++-------------------
1 file changed, 90 insertions(+), 56 deletions(-)
I revived this old patch based on a reference to it on the list this
week. I dropped the i386 portion of the old patch, because I cannot
imagine there would be any improvement there. But for 64-bit it's a
more important than 32-bit, and some performance numbers show it. I
tested arm-linux-user because by default our 32-bit guests allocate
their entire address space, and by default that allocation is placed
by the kernel and so tends to wind up in high memory somewhere. And
since I have some access to live arm hosts, it was easy to grab the
binaries for gcc with which to do some testing.
./cc1 -O compiling a large input file:
old new
avg 38.50 35.06
stddev 0.52 1.88
improvement 8.9%
r~
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4952c05..0524361 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -236,11 +236,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXW 0x800 /* Set REX.W = 1 */
# define P_REXB_R 0x1000 /* REG field as byte register */
# define P_REXB_RM 0x2000 /* R/M field as byte register */
+# define P_GS 0x4000 /* gs segment override */
#else
# define P_ADDR32 0
# define P_REXW 0
# define P_REXB_R 0
# define P_REXB_RM 0
+# define P_GS 0
#endif
#define OPC_ARITH_EvIz (0x81)
@@ -356,6 +358,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
{
int rex;
+ if (opc & P_GS) {
+ tcg_out8(s, 0x65);
+ }
if (opc & P_DATA16) {
/* We should never be asking for both 16 and 64-bit operation. */
assert((opc & P_REXW) == 0);
@@ -1080,10 +1085,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
offsetof(CPUTLBEntry, addend) - which);
}
-#endif
+#elif defined(__x86_64__) && defined(__linux__)
+# include <sys/syscall.h>
+# include <asm/prctl.h>
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
+ guest_base_flags = P_GS;
+ }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1092,28 +1112,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#endif
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
break;
case 0 | 4:
- tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
break;
case 1:
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
if (bswap) {
tcg_out_rolw_8(s, datalo);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
tcg_out_rolw_8(s, datalo);
tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
+ datalo, base, ofs);
}
break;
case 2:
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
@@ -1121,17 +1142,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#if TCG_TARGET_REG_BITS == 64
case 2 | 4:
if (bswap) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
tcg_out_bswap32(s, datalo);
tcg_out_ext32s(s, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
}
break;
#endif
case 3:
if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+ datalo, base, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
@@ -1142,11 +1164,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
datahi = t;
}
if (base != datalo) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
} else {
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
}
if (bswap) {
tcg_out_bswap32(s, datalo);
@@ -1192,7 +1218,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1285,29 +1311,26 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
- offset = 0;
- }
+ int seg = 0;
+
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ offset = 0;
}
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1322,7 +1345,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+ datalo, base, ofs);
break;
case 1:
if (bswap) {
@@ -1330,7 +1354,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+ datalo, base, ofs);
break;
case 2:
if (bswap) {
@@ -1338,7 +1363,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
break;
case 3:
if (TCG_TARGET_REG_BITS == 64) {
@@ -1347,17 +1372,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+ datalo, base, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
} else {
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
}
break;
default:
@@ -1391,7 +1417,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1451,23 +1477,19 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
- offset = 0;
- }
+ int seg = 0;
+
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ offset = 0;
}
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
@@ -2006,6 +2028,11 @@ static int tcg_target_callee_save_regs[] = {
#endif
};
+#if defined(__x86_64__) && defined(__linux__)
+# include <sys/syscall.h>
+# include <asm/prctl.h>
+#endif
+
/* Compute frame size via macros, to share between tcg_target_qemu_prologue
and tcg_register_jit. */
@@ -2061,6 +2088,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
}
tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+ /* Try to set up a segment register to point to GUEST_BASE. */
+ if (GUEST_BASE) {
+ setup_guest_base_seg();
+ }
+#endif
}
static void tcg_target_init(TCGContext *s)
--
1.7.11.7
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-18 3:28 [Qemu-devel] [PATCH] " Richard Henderson
@ 2012-10-18 3:36 ` Richard Henderson
2012-10-21 4:26 ` Aurelien Jarno
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2012-10-18 3:36 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
When we allocate a reserved_va for the guest, the kernel will likely
choose an address well above 4G. At which point we must use a pair
of movabsq+addq to form the host address. If we have OS support,
set up a segment register to point to guest_base instead.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/i386/tcg-target.c | 141 ++++++++++++++++++++++++++++++--------------------
1 file changed, 85 insertions(+), 56 deletions(-)
Dang it, left some duplicate includes in there...
r~
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4952c05..0d8855a 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -236,11 +236,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXW 0x800 /* Set REX.W = 1 */
# define P_REXB_R 0x1000 /* REG field as byte register */
# define P_REXB_RM 0x2000 /* R/M field as byte register */
+# define P_GS 0x4000 /* gs segment override */
#else
# define P_ADDR32 0
# define P_REXW 0
# define P_REXB_R 0
# define P_REXB_RM 0
+# define P_GS 0
#endif
#define OPC_ARITH_EvIz (0x81)
@@ -356,6 +358,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
{
int rex;
+ if (opc & P_GS) {
+ tcg_out8(s, 0x65);
+ }
if (opc & P_DATA16) {
/* We should never be asking for both 16 and 64-bit operation. */
assert((opc & P_REXW) == 0);
@@ -1080,10 +1085,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
offsetof(CPUTLBEntry, addend) - which);
}
-#endif
+#elif defined(__x86_64__) && defined(__linux__)
+# include <sys/syscall.h>
+# include <asm/prctl.h>
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
+ guest_base_flags = P_GS;
+ }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1092,28 +1112,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#endif
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
break;
case 0 | 4:
- tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
break;
case 1:
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
if (bswap) {
tcg_out_rolw_8(s, datalo);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
tcg_out_rolw_8(s, datalo);
tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
+ datalo, base, ofs);
}
break;
case 2:
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
@@ -1121,17 +1142,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#if TCG_TARGET_REG_BITS == 64
case 2 | 4:
if (bswap) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
tcg_out_bswap32(s, datalo);
tcg_out_ext32s(s, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
}
break;
#endif
case 3:
if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+ datalo, base, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
@@ -1142,11 +1164,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
datahi = t;
}
if (base != datalo) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
} else {
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
}
if (bswap) {
tcg_out_bswap32(s, datalo);
@@ -1192,7 +1218,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1285,29 +1311,26 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
- offset = 0;
- }
+ int seg = 0;
+
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ offset = 0;
}
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1322,7 +1345,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+ datalo, base, ofs);
break;
case 1:
if (bswap) {
@@ -1330,7 +1354,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+ datalo, base, ofs);
break;
case 2:
if (bswap) {
@@ -1338,7 +1363,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
break;
case 3:
if (TCG_TARGET_REG_BITS == 64) {
@@ -1347,17 +1372,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+ datalo, base, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
} else {
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
}
break;
default:
@@ -1391,7 +1417,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1451,23 +1477,19 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
- base = TCG_REG_L0;
- offset = 0;
- }
+ int seg = 0;
+
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ offset = 0;
}
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
@@ -2061,6 +2083,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
}
tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+ /* Try to set up a segment register to point to GUEST_BASE. */
+ if (GUEST_BASE) {
+ setup_guest_base_seg();
+ }
+#endif
}
static void tcg_target_init(TCGContext *s)
--
1.7.11.7
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-18 3:36 ` [Qemu-devel] [PATCH v2] " Richard Henderson
@ 2012-10-21 4:26 ` Aurelien Jarno
2012-10-21 6:24 ` Richard Henderson
2012-10-21 20:43 ` Richard Henderson
0 siblings, 2 replies; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-21 4:26 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Oct 18, 2012 at 01:36:05PM +1000, Richard Henderson wrote:
> When we allocate a reserved_va for the guest, the kernel will likely
> choose an address well above 4G. At which point we must use a pair
> of movabsq+addq to form the host address. If we have OS support,
> set up a segment register to point to guest_base instead.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/i386/tcg-target.c | 141 ++++++++++++++++++++++++++++++--------------------
> 1 file changed, 85 insertions(+), 56 deletions(-)
>
> Dang it, left some duplicate includes in there...
>
>
> r~
>
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4952c05..0d8855a 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -236,11 +236,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
> # define P_REXW 0x800 /* Set REX.W = 1 */
> # define P_REXB_R 0x1000 /* REG field as byte register */
> # define P_REXB_RM 0x2000 /* R/M field as byte register */
> +# define P_GS 0x4000 /* gs segment override */
> #else
> # define P_ADDR32 0
> # define P_REXW 0
> # define P_REXB_R 0
> # define P_REXB_RM 0
> +# define P_GS 0
> #endif
>
> #define OPC_ARITH_EvIz (0x81)
> @@ -356,6 +358,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
> {
> int rex;
>
> + if (opc & P_GS) {
> + tcg_out8(s, 0x65);
> + }
> if (opc & P_DATA16) {
> /* We should never be asking for both 16 and 64-bit operation. */
> assert((opc & P_REXW) == 0);
> @@ -1080,10 +1085,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
> tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
> offsetof(CPUTLBEntry, addend) - which);
> }
> -#endif
> +#elif defined(__x86_64__) && defined(__linux__)
> +# include <sys/syscall.h>
> +# include <asm/prctl.h>
> +
> +static int guest_base_flags;
> +static inline void setup_guest_base_seg(void)
> +{
> + if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
> + guest_base_flags = P_GS;
> + }
Why calling the syscall directly instead of using arch_prctl(2)?
> +}
> +#else
> +# define guest_base_flags 0
> +static inline void setup_guest_base_seg(void) { }
> +#endif /* SOFTMMU */
>
> static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> - int base, tcg_target_long ofs, int sizeop)
> + int base, tcg_target_long ofs, int seg,
> + int sizeop)
> {
> #ifdef TARGET_WORDS_BIGENDIAN
> const int bswap = 1;
> @@ -1092,28 +1112,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> #endif
> switch (sizeop) {
> case 0:
> - tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
> break;
> case 0 | 4:
> - tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
> break;
> case 1:
> - tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
> if (bswap) {
> tcg_out_rolw_8(s, datalo);
> }
> break;
> case 1 | 4:
> if (bswap) {
> - tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
> tcg_out_rolw_8(s, datalo);
> tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
> } else {
> - tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
> + datalo, base, ofs);
> }
> break;
> case 2:
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
> if (bswap) {
> tcg_out_bswap32(s, datalo);
> }
> @@ -1121,17 +1142,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> #if TCG_TARGET_REG_BITS == 64
> case 2 | 4:
> if (bswap) {
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
> tcg_out_bswap32(s, datalo);
> tcg_out_ext32s(s, datalo, datalo);
> } else {
> - tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
> }
> break;
> #endif
> case 3:
> if (TCG_TARGET_REG_BITS == 64) {
> - tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
> + datalo, base, ofs);
> if (bswap) {
> tcg_out_bswap64(s, datalo);
> }
> @@ -1142,11 +1164,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
> datahi = t;
> }
> if (base != datalo) {
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> - tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datahi, base, ofs + 4);
> } else {
> - tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
> - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datahi, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
> + datalo, base, ofs);
> }
> if (bswap) {
> tcg_out_bswap32(s, datalo);
> @@ -1192,7 +1218,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> label_ptr, offsetof(CPUTLBEntry, addr_read));
>
> /* TLB Hit. */
> - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
> + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
>
> /* jmp label2 */
> tcg_out8(s, OPC_JMP_short);
> @@ -1285,29 +1311,26 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> {
> int32_t offset = GUEST_BASE;
> int base = args[addrlo_idx];
> -
> - if (TCG_TARGET_REG_BITS == 64) {
> - /* ??? We assume all operations have left us with register
> - contents that are zero extended. So far this appears to
> - be true. If we want to enforce this, we can either do
> - an explicit zero-extension here, or (if GUEST_BASE == 0)
> - use the ADDR32 prefix. For now, do nothing. */
> -
AFAIU this comment is still valid when %gs is/can not be used. Why
removing it?
> - if (offset != GUEST_BASE) {
> - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> - base = TCG_REG_L0;
> - offset = 0;
> - }
> + int seg = 0;
> +
> + if (GUEST_BASE && guest_base_flags) {
> + seg = guest_base_flags;
> + offset = 0;
> + } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> + base = TCG_REG_L0;
> + offset = 0;
> }
>
> - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
> + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
> }
> #endif
> }
>
> static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> - int base, tcg_target_long ofs, int sizeop)
> + int base, tcg_target_long ofs, int seg,
> + int sizeop)
> {
> #ifdef TARGET_WORDS_BIGENDIAN
> const int bswap = 1;
> @@ -1322,7 +1345,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
>
> switch (sizeop) {
> case 0:
> - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
> + datalo, base, ofs);
> break;
> case 1:
> if (bswap) {
> @@ -1330,7 +1354,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> tcg_out_rolw_8(s, scratch);
> datalo = scratch;
> }
> - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
> + datalo, base, ofs);
> break;
> case 2:
> if (bswap) {
> @@ -1338,7 +1363,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> tcg_out_bswap32(s, scratch);
> datalo = scratch;
> }
> - tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
> break;
> case 3:
> if (TCG_TARGET_REG_BITS == 64) {
> @@ -1347,17 +1372,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
> tcg_out_bswap64(s, scratch);
> datalo = scratch;
> }
> - tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
> + datalo, base, ofs);
> } else if (bswap) {
> tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
> tcg_out_bswap32(s, scratch);
> - tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
> tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
> tcg_out_bswap32(s, scratch);
> - tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
> } else {
> - tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
> - tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
> + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
> }
> break;
> default:
> @@ -1391,7 +1417,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> label_ptr, offsetof(CPUTLBEntry, addr_write));
>
> /* TLB Hit. */
> - tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
> + tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, 0, opc);
>
> /* jmp label2 */
> tcg_out8(s, OPC_JMP_short);
> @@ -1451,23 +1477,19 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> {
> int32_t offset = GUEST_BASE;
> int base = args[addrlo_idx];
> -
> - if (TCG_TARGET_REG_BITS == 64) {
> - /* ??? We assume all operations have left us with register
> - contents that are zero extended. So far this appears to
> - be true. If we want to enforce this, we can either do
> - an explicit zero-extension here, or (if GUEST_BASE == 0)
> - use the ADDR32 prefix. For now, do nothing. */
> -
Same here.
> - if (offset != GUEST_BASE) {
> - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> - base = TCG_REG_L0;
> - offset = 0;
> - }
> + int seg = 0;
> +
> + if (GUEST_BASE && guest_base_flags) {
> + seg = guest_base_flags;
> + offset = 0;
> + } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> + base = TCG_REG_L0;
> + offset = 0;
> }
>
> - tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
> + tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
> }
> #endif
> }
> @@ -2061,6 +2083,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
> tcg_out_pop(s, tcg_target_callee_save_regs[i]);
> }
> tcg_out_opc(s, OPC_RET, 0, 0, 0);
> +
> +#if !defined(CONFIG_SOFTMMU)
> + /* Try to set up a segment register to point to GUEST_BASE. */
> + if (GUEST_BASE) {
> + setup_guest_base_seg();
> + }
> +#endif
> }
>
> static void tcg_target_init(TCGContext *s)
Otherwise looks fine, though I haven't done any benchmark.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-21 4:26 ` Aurelien Jarno
@ 2012-10-21 6:24 ` Richard Henderson
2012-10-21 20:43 ` Richard Henderson
1 sibling, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2012-10-21 6:24 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 2012-10-21 14:26, Aurelien Jarno wrote:
>> > - if (TCG_TARGET_REG_BITS == 64) {
>> > - /* ??? We assume all operations have left us with register
>> > - contents that are zero extended. So far this appears to
>> > - be true. If we want to enforce this, we can either do
>> > - an explicit zero-extension here, or (if GUEST_BASE == 0)
>> > - use the ADDR32 prefix. For now, do nothing. */
>> > -
> AFAIU this comment is still valid when %gs is/can not be used. Why
> removing it?
>
Merge error. I guess I should retain it.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
2012-10-21 4:26 ` Aurelien Jarno
2012-10-21 6:24 ` Richard Henderson
@ 2012-10-21 20:43 ` Richard Henderson
1 sibling, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2012-10-21 20:43 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 2012-10-21 14:26, Aurelien Jarno wrote:
>> > +static inline void setup_guest_base_seg(void)
>> > +{
>> > + if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
>> > + guest_base_flags = P_GS;
>> > + }
> Why calling the syscall directly instead of using arch_prctl(2)?
>
Err... there is no such function in libc. Ah, I see there is a prctl
declaration and a <sys/prctl.h> now. I can definitely update that.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-10-29 14:34 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-22 2:11 [Qemu-devel] [PATCH v2] tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE Richard Henderson
2012-10-22 5:59 ` Aurelien Jarno
2012-10-22 21:19 ` Richard Henderson
2012-10-29 14:34 ` Aurelien Jarno
-- strict thread matches above, loose matches on Subject: below --
2012-10-18 3:28 [Qemu-devel] [PATCH] " Richard Henderson
2012-10-18 3:36 ` [Qemu-devel] [PATCH v2] " Richard Henderson
2012-10-21 4:26 ` Aurelien Jarno
2012-10-21 6:24 ` Richard Henderson
2012-10-21 20:43 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).