qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH for-2.4 0/2] tcg/i386 address zero-extension
@ 2015-07-16 21:25 Richard Henderson
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 1/2] tcg/i386: Extend addresses for 32-bit guests Richard Henderson
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available Richard Henderson
  0 siblings, 2 replies; 9+ messages in thread
From: Richard Henderson @ 2015-07-16 21:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: leon.alrae, aurelien

This is an alternative to the patch that Aurelien posted yesterday.


r~


Richard Henderson (2):
  tcg/i386: Extend addresses for 32-bit guests
  tcg/i386: Reserve register for guest_base if a segment isn't available

 tcg/i386/tcg-target.c | 163 ++++++++++++++++++++++++++++----------------------
 1 file changed, 90 insertions(+), 73 deletions(-)

-- 
2.4.3

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 1/2] tcg/i386: Extend addresses for 32-bit guests
  2015-07-16 21:25 [Qemu-devel] [PATCH for-2.4 0/2] tcg/i386 address zero-extension Richard Henderson
@ 2015-07-16 21:25 ` Richard Henderson
  2015-07-17 10:18   ` Aurelien Jarno
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available Richard Henderson
  1 sibling, 1 reply; 9+ messages in thread
From: Richard Henderson @ 2015-07-16 21:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: leon.alrae, aurelien

Removing the ??? comment explaining why it (mostly) worked.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c | 105 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 65 insertions(+), 40 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index ff4d9cf..bbe2963 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1434,8 +1434,8 @@ static inline void setup_guest_base_seg(void) { }
 #endif /* SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                                   TCGReg base, intptr_t ofs, int seg,
-                                   TCGMemOp memop)
+                                   TCGReg base, int index, intptr_t ofs,
+                                   int seg, TCGMemOp memop)
 {
     const TCGMemOp real_bswap = memop & MO_BSWAP;
     TCGMemOp bswap = real_bswap;
@@ -1448,13 +1448,16 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 
     switch (memop & MO_SSIZE) {
     case MO_UB:
-        tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
+                                 base, index, 0, ofs);
         break;
     case MO_SB:
-        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
+                                 base, index, 0, ofs);
         break;
     case MO_UW:
-        tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+                                 base, index, 0, ofs);
         if (real_bswap) {
             tcg_out_rolw_8(s, datalo);
         }
@@ -1462,20 +1465,21 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
     case MO_SW:
         if (real_bswap) {
             if (have_movbe) {
-                tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
-                                     datalo, base, ofs);
+                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                         datalo, base, index, 0, ofs);
             } else {
-                tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
+                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+                                         base, index, 0, ofs);
                 tcg_out_rolw_8(s, datalo);
             }
             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
-                                 datalo, base, ofs);
+            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
+                                     datalo, base, index, 0, ofs);
         }
         break;
     case MO_UL:
-        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
         if (bswap) {
             tcg_out_bswap32(s, datalo);
         }
@@ -1483,19 +1487,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 #if TCG_TARGET_REG_BITS == 64
     case MO_SL:
         if (real_bswap) {
-            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+                                     base, index, 0, ofs);
             if (bswap) {
                 tcg_out_bswap32(s, datalo);
             }
             tcg_out_ext32s(s, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
+            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
+                                     base, index, 0, ofs);
         }
         break;
 #endif
     case MO_Q:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
+            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
+                                     base, index, 0, ofs);
             if (bswap) {
                 tcg_out_bswap64(s, datalo);
             }
@@ -1506,11 +1513,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                 datahi = t;
             }
             if (base != datalo) {
-                tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
-                tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
+                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+                                         base, index, 0, ofs);
+                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+                                         base, index, 0, ofs + 4);
             } else {
-                tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
-                tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+                                         base, index, 0, ofs + 4);
+                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+                                         base, index, 0, ofs);
             }
             if (bswap) {
                 tcg_out_bswap32(s, datalo);
@@ -1553,7 +1564,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
 
     /* Record the current context of a load into ldst label */
     add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
@@ -1562,24 +1573,29 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     {
         int32_t offset = GUEST_BASE;
         TCGReg base = addrlo;
+        int index = -1;
         int seg = 0;
 
-        /* ??? We assume all operations have left us with register contents
-           that are zero extended.  So far this appears to be true.  If we
-           want to enforce this, we can either do an explicit zero-extension
-           here, or (if GUEST_BASE == 0, or a segment register is in use)
-           use the ADDR32 prefix.  For now, do nothing.  */
         if (GUEST_BASE && guest_base_flags) {
             seg = guest_base_flags;
             offset = 0;
-        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
-            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
-            base = TCG_REG_L1;
-            offset = 0;
+            if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+                seg |= P_ADDR32;
+            }
+        } else if (TCG_TARGET_REG_BITS == 64) {
+            if (TARGET_LONG_BITS == 32) {
+                tcg_out_ext32u(s, TCG_REG_L0, base);
+                base = TCG_REG_L0;
+            }
+            if (offset != GUEST_BASE) {
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+                index = TCG_REG_L1;
+                offset = 0;
+            }
         }
 
-        tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
+        tcg_out_qemu_ld_direct(s, datalo, datahi,
+                               base, index, offset, seg, opc);
     }
 #endif
 }
@@ -1697,19 +1713,28 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
         TCGReg base = addrlo;
         int seg = 0;
 
-        /* ??? We assume all operations have left us with register contents
-           that are zero extended.  So far this appears to be true.  If we
-           want to enforce this, we can either do an explicit zero-extension
-           here, or (if GUEST_BASE == 0, or a segment register is in use)
-           use the ADDR32 prefix.  For now, do nothing.  */
         if (GUEST_BASE && guest_base_flags) {
             seg = guest_base_flags;
             offset = 0;
-        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
-            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
-            base = TCG_REG_L1;
-            offset = 0;
+            if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+                seg |= P_ADDR32;
+            }
+        } else if (TCG_TARGET_REG_BITS == 64) {
+            /* ??? Note that we can't use the same SIB addressing scheme
+               as for loads, since we require L0 free for bswap.  */
+            if (offset != GUEST_BASE) {
+                if (TARGET_LONG_BITS == 32) {
+                    tcg_out_ext32u(s, TCG_REG_L0, base);
+                    base = TCG_REG_L0;
+                }
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+                base = TCG_REG_L1;
+                offset = 0;
+            } else if (TARGET_LONG_BITS == 32) {
+                tcg_out_ext32u(s, TCG_REG_L1, base);
+                base = TCG_REG_L1;
+            }
         }
 
         tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available
  2015-07-16 21:25 [Qemu-devel] [PATCH for-2.4 0/2] tcg/i386 address zero-extension Richard Henderson
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 1/2] tcg/i386: Extend addresses for 32-bit guests Richard Henderson
@ 2015-07-16 21:25 ` Richard Henderson
  2015-07-17  1:33   ` Peter Maydell
  2015-07-17 10:18   ` Aurelien Jarno
  1 sibling, 2 replies; 9+ messages in thread
From: Richard Henderson @ 2015-07-16 21:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: leon.alrae, aurelien

This saves 2 insns and 10 bytes from the implementation of
each memory operation.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c | 120 +++++++++++++++++++++++---------------------------
 1 file changed, 56 insertions(+), 64 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bbe2963..beffbbe 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1421,16 +1421,25 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
 int arch_prctl(int code, unsigned long addr);
 
+static int32_t guest_base_ofs;
 static int guest_base_flags;
-static inline void setup_guest_base_seg(void)
+static int guest_base_reg = -1;
+static inline void setup_guest_base(TCGContext *s)
 {
     if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
         guest_base_flags = P_GS;
+    } else if (GUEST_BASE == (int32_t)GUEST_BASE) {
+        guest_base_ofs = GUEST_BASE;
+    } else {
+        guest_base_reg = TCG_REG_EBP;
+        tcg_regset_set_reg(s->reserved_regs, guest_base_reg);
+        tcg_out_movi(s, TCG_TYPE_PTR, guest_base_reg, GUEST_BASE);
     }
 }
 #else
-# define guest_base_flags 0
-static inline void setup_guest_base_seg(void) { }
+# define guest_base_flags  0
+# define guest_base_reg    -1
+# define guest_base_ofs    GUEST_BASE
 #endif /* SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
@@ -1571,38 +1580,28 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
                         s->code_ptr, label_ptr);
 #else
     {
-        int32_t offset = GUEST_BASE;
         TCGReg base = addrlo;
-        int index = -1;
-        int seg = 0;
+        int flags = 0;
 
-        if (GUEST_BASE && guest_base_flags) {
-            seg = guest_base_flags;
-            offset = 0;
+        if (GUEST_BASE == 0 || guest_base_flags) {
+            flags = guest_base_flags;
             if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
-                seg |= P_ADDR32;
-            }
-        } else if (TCG_TARGET_REG_BITS == 64) {
-            if (TARGET_LONG_BITS == 32) {
-                tcg_out_ext32u(s, TCG_REG_L0, base);
-                base = TCG_REG_L0;
-            }
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-                index = TCG_REG_L1;
-                offset = 0;
+                flags |= P_ADDR32;
             }
+        } else if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+            tcg_out_ext32u(s, TCG_REG_L1, base);
+            base = TCG_REG_L1;
         }
 
-        tcg_out_qemu_ld_direct(s, datalo, datahi,
-                               base, index, offset, seg, opc);
+        tcg_out_qemu_ld_direct(s, datalo, datahi, base, guest_base_reg,
+                               guest_base_ofs, flags, opc);
     }
 #endif
 }
 
 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
-                                   TCGReg base, intptr_t ofs, int seg,
-                                   TCGMemOp memop)
+                                   TCGReg base, int index, intptr_t ofs,
+                                   int seg, TCGMemOp memop)
 {
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
@@ -1626,8 +1625,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
-                             datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, datalo,
+                                 base, index, 0, ofs);
         break;
     case MO_16:
         if (bswap) {
@@ -1635,7 +1634,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
             tcg_out_rolw_8(s, scratch);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
+                                 base, index, 0, ofs);
         break;
     case MO_32:
         if (bswap) {
@@ -1643,7 +1643,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
             tcg_out_bswap32(s, scratch);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
         break;
     case MO_64:
         if (TCG_TARGET_REG_BITS == 64) {
@@ -1652,22 +1652,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                 tcg_out_bswap64(s, scratch);
                 datalo = scratch;
             }
-            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
+            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
+                                     base, index, 0, ofs);
         } else if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
             tcg_out_bswap32(s, scratch);
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
+            tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
+                                     base, index, 0, ofs);
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             tcg_out_bswap32(s, scratch);
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
+            tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
+                                     base, index, 0, ofs+4);
         } else {
             if (real_bswap) {
                 int t = datalo;
                 datalo = datahi;
                 datahi = t;
             }
-            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
-            tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
+            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+                                     base, index, 0, ofs);
+            tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+                                     base, index, 0, ofs+4);
         }
         break;
     default:
@@ -1702,42 +1707,28 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
 
     /* Record the current context of a store into ldst label */
     add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
                         s->code_ptr, label_ptr);
 #else
     {
-        int32_t offset = GUEST_BASE;
         TCGReg base = addrlo;
-        int seg = 0;
+        int flags = 0;
 
-        if (GUEST_BASE && guest_base_flags) {
-            seg = guest_base_flags;
-            offset = 0;
+        if (GUEST_BASE == 0 || guest_base_flags) {
+            flags = guest_base_flags;
             if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
-                seg |= P_ADDR32;
-            }
-        } else if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? Note that we can't use the same SIB addressing scheme
-               as for loads, since we require L0 free for bswap.  */
-            if (offset != GUEST_BASE) {
-                if (TARGET_LONG_BITS == 32) {
-                    tcg_out_ext32u(s, TCG_REG_L0, base);
-                    base = TCG_REG_L0;
-                }
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
-                base = TCG_REG_L1;
-                offset = 0;
-            } else if (TARGET_LONG_BITS == 32) {
-                tcg_out_ext32u(s, TCG_REG_L1, base);
-                base = TCG_REG_L1;
+                flags |= P_ADDR32;
             }
+        } else if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+            tcg_out_ext32u(s, TCG_REG_L1, base);
+            base = TCG_REG_L1;
         }
 
-        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
+        tcg_out_qemu_st_direct(s, datalo, datahi, base, guest_base_reg,
+                               guest_base_ofs, flags, opc);
     }
 #endif
 }
@@ -2286,6 +2277,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 #else
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+
+# if !defined(CONFIG_SOFTMMU)
+    /* Try to set up a segment register to point to GUEST_BASE.  */
+    if (GUEST_BASE) {
+        setup_guest_base(s);
+    }
+# endif
+
     /* jmp *tb.  */
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
 #endif
@@ -2299,13 +2298,6 @@ static void tcg_target_qemu_prologue(TCGContext *s)
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
     }
     tcg_out_opc(s, OPC_RET, 0, 0, 0);
-
-#if !defined(CONFIG_SOFTMMU)
-    /* Try to set up a segment register to point to GUEST_BASE.  */
-    if (GUEST_BASE) {
-        setup_guest_base_seg();
-    }
-#endif
 }
 
 static void tcg_target_init(TCGContext *s)
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available Richard Henderson
@ 2015-07-17  1:33   ` Peter Maydell
  2015-07-17  6:02     ` Richard Henderson
  2015-07-17 15:30     ` Paolo Bonzini
  2015-07-17 10:18   ` Aurelien Jarno
  1 sibling, 2 replies; 9+ messages in thread
From: Peter Maydell @ 2015-07-17  1:33 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Leon Alrae, QEMU Developers, Aurelien Jarno

On 16 July 2015 at 22:25, Richard Henderson <rth@twiddle.net> wrote:
> This saves 2 insns and 10 bytes from the implementation of
> each memory operation.

Do we have an idea of which platforms/configs don't let
us have a segment register for guest_base?

thanks
-- PMM

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available
  2015-07-17  1:33   ` Peter Maydell
@ 2015-07-17  6:02     ` Richard Henderson
  2015-07-17 15:30     ` Paolo Bonzini
  1 sibling, 0 replies; 9+ messages in thread
From: Richard Henderson @ 2015-07-17  6:02 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Leon Alrae, QEMU Developers, Aurelien Jarno

On 07/17/2015 02:33 AM, Peter Maydell wrote:
> On 16 July 2015 at 22:25, Richard Henderson <rth@twiddle.net> wrote:
>> This saves 2 insns and 10 bytes from the implementation of
>> each memory operation.
>
> Do we have an idea of which platforms/configs don't let
> us have a segment register for guest_base?

We've only bothered to fill in code for Linux so far.

These days we should probably probe for fsgsbase insns and use those.  Though I 
don't know how many of the various operating systems enable that...


r~

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] tcg/i386: Extend addresses for 32-bit guests
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 1/2] tcg/i386: Extend addresses for 32-bit guests Richard Henderson
@ 2015-07-17 10:18   ` Aurelien Jarno
  0 siblings, 0 replies; 9+ messages in thread
From: Aurelien Jarno @ 2015-07-17 10:18 UTC (permalink / raw)
  To: Richard Henderson; +Cc: leon.alrae, qemu-devel

On 2015-07-16 22:25, Richard Henderson wrote:
> Removing the ??? comment explaining why it (mostly) worked.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/i386/tcg-target.c | 105 +++++++++++++++++++++++++++++++-------------------
>  1 file changed, 65 insertions(+), 40 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index ff4d9cf..bbe2963 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1434,8 +1434,8 @@ static inline void setup_guest_base_seg(void) { }
>  #endif /* SOFTMMU */
>  
>  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
> -                                   TCGReg base, intptr_t ofs, int seg,
> -                                   TCGMemOp memop)
> +                                   TCGReg base, int index, intptr_t ofs,
> +                                   int seg, TCGMemOp memop)
>  {
>      const TCGMemOp real_bswap = memop & MO_BSWAP;
>      TCGMemOp bswap = real_bswap;
> @@ -1448,13 +1448,16 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>  
>      switch (memop & MO_SSIZE) {
>      case MO_UB:
> -        tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
> +        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
> +                                 base, index, 0, ofs);
>          break;
>      case MO_SB:
> -        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
> +        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
> +                                 base, index, 0, ofs);
>          break;
>      case MO_UW:
> -        tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
> +        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
> +                                 base, index, 0, ofs);
>          if (real_bswap) {
>              tcg_out_rolw_8(s, datalo);
>          }
> @@ -1462,20 +1465,21 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>      case MO_SW:
>          if (real_bswap) {
>              if (have_movbe) {
> -                tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
> -                                     datalo, base, ofs);
> +                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
> +                                         datalo, base, index, 0, ofs);
>              } else {
> -                tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
> +                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
> +                                         base, index, 0, ofs);
>                  tcg_out_rolw_8(s, datalo);
>              }
>              tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
>          } else {
> -            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
> -                                 datalo, base, ofs);
> +            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
> +                                     datalo, base, index, 0, ofs);
>          }
>          break;
>      case MO_UL:
> -        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
> +        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
>          if (bswap) {
>              tcg_out_bswap32(s, datalo);
>          }
> @@ -1483,19 +1487,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>  #if TCG_TARGET_REG_BITS == 64
>      case MO_SL:
>          if (real_bswap) {
> -            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
> +            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
> +                                     base, index, 0, ofs);
>              if (bswap) {
>                  tcg_out_bswap32(s, datalo);
>              }
>              tcg_out_ext32s(s, datalo, datalo);
>          } else {
> -            tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
> +            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
> +                                     base, index, 0, ofs);
>          }
>          break;
>  #endif
>      case MO_Q:
>          if (TCG_TARGET_REG_BITS == 64) {
> -            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
> +            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
> +                                     base, index, 0, ofs);
>              if (bswap) {
>                  tcg_out_bswap64(s, datalo);
>              }
> @@ -1506,11 +1513,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>                  datahi = t;
>              }
>              if (base != datalo) {
> -                tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
> -                tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
> +                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
> +                                         base, index, 0, ofs);
> +                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
> +                                         base, index, 0, ofs + 4);
>              } else {
> -                tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
> -                tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
> +                tcg_out_modrm_sib_offset(s, movop + seg, datahi,
> +                                         base, index, 0, ofs + 4);
> +                tcg_out_modrm_sib_offset(s, movop + seg, datalo,
> +                                         base, index, 0, ofs);
>              }
>              if (bswap) {
>                  tcg_out_bswap32(s, datalo);
> @@ -1553,7 +1564,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>                       label_ptr, offsetof(CPUTLBEntry, addr_read));
>  
>      /* TLB Hit.  */
> -    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> +    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
>  
>      /* Record the current context of a load into ldst label */
>      add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
> @@ -1562,24 +1573,29 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>      {
>          int32_t offset = GUEST_BASE;
>          TCGReg base = addrlo;
> +        int index = -1;
>          int seg = 0;
>  
> -        /* ??? We assume all operations have left us with register contents
> -           that are zero extended.  So far this appears to be true.  If we
> -           want to enforce this, we can either do an explicit zero-extension
> -           here, or (if GUEST_BASE == 0, or a segment register is in use)
> -           use the ADDR32 prefix.  For now, do nothing.  */

I think we should keep a comment about why we need to use addr32 or
zero-extend the value.

>          if (GUEST_BASE && guest_base_flags) {
>              seg = guest_base_flags;
>              offset = 0;
> -        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
> -            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
> -            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
> -            base = TCG_REG_L1;
> -            offset = 0;
> +            if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {

You can write that as (TCG_TARGET_REG_BITS > TARGET_LONG_BITS)

> +                seg |= P_ADDR32;
> +            }
> +        } else if (TCG_TARGET_REG_BITS == 64) {
> +            if (TARGET_LONG_BITS == 32) {
> +                tcg_out_ext32u(s, TCG_REG_L0, base);
> +                base = TCG_REG_L0;
> +            }
> +            if (offset != GUEST_BASE) {
> +                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
> +                index = TCG_REG_L1;
> +                offset = 0;
> +            }
>          }
>  
> -        tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
> +        tcg_out_qemu_ld_direct(s, datalo, datahi,
> +                               base, index, offset, seg, opc);
>      }
>  #endif
>  }
> @@ -1697,19 +1713,28 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>          TCGReg base = addrlo;
>          int seg = 0;
>  
> -        /* ??? We assume all operations have left us with register contents
> -           that are zero extended.  So far this appears to be true.  If we
> -           want to enforce this, we can either do an explicit zero-extension
> -           here, or (if GUEST_BASE == 0, or a segment register is in use)
> -           use the ADDR32 prefix.  For now, do nothing.  */
>          if (GUEST_BASE && guest_base_flags) {
>              seg = guest_base_flags;
>              offset = 0;
> -        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
> -            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
> -            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
> -            base = TCG_REG_L1;
> -            offset = 0;
> +            if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
> +                seg |= P_ADDR32;
> +            }
> +        } else if (TCG_TARGET_REG_BITS == 64) {
> +            /* ??? Note that we can't use the same SIB addressing scheme
> +               as for loads, since we require L0 free for bswap.  */
> +            if (offset != GUEST_BASE) {
> +                if (TARGET_LONG_BITS == 32) {
> +                    tcg_out_ext32u(s, TCG_REG_L0, base);
> +                    base = TCG_REG_L0;
> +                }
> +                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
> +                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
> +                base = TCG_REG_L1;
> +                offset = 0;
> +            } else if (TARGET_LONG_BITS == 32) {
> +                tcg_out_ext32u(s, TCG_REG_L1, base);
> +                base = TCG_REG_L1;
> +            }
>          }
>  
>          tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);

Besides the small nitpicks above:

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available
  2015-07-16 21:25 ` [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available Richard Henderson
  2015-07-17  1:33   ` Peter Maydell
@ 2015-07-17 10:18   ` Aurelien Jarno
  1 sibling, 0 replies; 9+ messages in thread
From: Aurelien Jarno @ 2015-07-17 10:18 UTC (permalink / raw)
  To: Richard Henderson; +Cc: leon.alrae, qemu-devel

On 2015-07-16 22:25, Richard Henderson wrote:
> This saves 2 insns and 10 bytes from the implementation of
> each memory operation.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/i386/tcg-target.c | 120 +++++++++++++++++++++++---------------------------
>  1 file changed, 56 insertions(+), 64 deletions(-)
> 

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

That said I do wonder if it is 2.4 material or if it should be deferred
to 2.5.

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available
  2015-07-17  1:33   ` Peter Maydell
  2015-07-17  6:02     ` Richard Henderson
@ 2015-07-17 15:30     ` Paolo Bonzini
  2015-07-17 15:56       ` Aurelien Jarno
  1 sibling, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2015-07-17 15:30 UTC (permalink / raw)
  To: Peter Maydell, Richard Henderson
  Cc: Leon Alrae, QEMU Developers, Aurelien Jarno



On 17/07/2015 03:33, Peter Maydell wrote:
> On 16 July 2015 at 22:25, Richard Henderson <rth@twiddle.net> wrote:
> > This saves 2 insns and 10 bytes from the implementation of
> > each memory operation.
> 
> Do we have an idea of which platforms/configs don't let
> us have a segment register for guest_base?

Everything non-Linux I think, which for user-mode emulation means bsd-user.

Paolo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available
  2015-07-17 15:30     ` Paolo Bonzini
@ 2015-07-17 15:56       ` Aurelien Jarno
  0 siblings, 0 replies; 9+ messages in thread
From: Aurelien Jarno @ 2015-07-17 15:56 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Peter Maydell, Leon Alrae, QEMU Developers, Richard Henderson

On 2015-07-17 17:30, Paolo Bonzini wrote:
> 
> 
> On 17/07/2015 03:33, Peter Maydell wrote:
> > On 16 July 2015 at 22:25, Richard Henderson <rth@twiddle.net> wrote:
> > > This saves 2 insns and 10 bytes from the implementation of
> > > each memory operation.
> > 
> > Do we have an idea of which platforms/configs don't let
> > us have a segment register for guest_base?
> 
> Everything non-Linux I think, which for user-mode emulation means bsd-user.

Linux might also not allow to use a segment register if the prctl call
with ARCH_SET_GS fails. OTOH I have no idea why it can fail, but we
actually check that it doesn't fail before using %gs.

-- 
Aurelien Jarno                          GPG: 4096R/1DDD8C9B
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-07-17 15:56 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-16 21:25 [Qemu-devel] [PATCH for-2.4 0/2] tcg/i386 address zero-extension Richard Henderson
2015-07-16 21:25 ` [Qemu-devel] [PATCH 1/2] tcg/i386: Extend addresses for 32-bit guests Richard Henderson
2015-07-17 10:18   ` Aurelien Jarno
2015-07-16 21:25 ` [Qemu-devel] [PATCH 2/2] tcg/i386: Reserve register for guest_base if a segment isn't available Richard Henderson
2015-07-17  1:33   ` Peter Maydell
2015-07-17  6:02     ` Richard Henderson
2015-07-17 15:30     ` Paolo Bonzini
2015-07-17 15:56       ` Aurelien Jarno
2015-07-17 10:18   ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).