qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, "Philippe Mathieu-Daudé" <f4bug@amsat.org>
Subject: [PULL 02/47] tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP
Date: Thu,  7 Jan 2021 10:14:03 -1000	[thread overview]
Message-ID: <20210107201448.1152301-3-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210107201448.1152301-1-richard.henderson@linaro.org>

Always true when movbe is available, otherwise leave
this to generic code.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.h     |   3 +-
 tcg/i386/tcg-target.c.inc | 119 ++++++++++++++------------------------
 2 files changed, 47 insertions(+), 75 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index cd067e0b30..b1ada9777a 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -101,6 +101,7 @@ extern bool have_bmi1;
 extern bool have_popcnt;
 extern bool have_avx1;
 extern bool have_avx2;
+extern bool have_movbe;
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32         1
@@ -225,7 +226,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
-#define TCG_TARGET_HAS_MEMORY_BSWAP  1
+#define TCG_TARGET_HAS_MEMORY_BSWAP  have_movbe
 
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index d8797ed398..01588cdcb4 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -154,13 +154,12 @@ bool have_bmi1;
 bool have_popcnt;
 bool have_avx1;
 bool have_avx2;
+bool have_movbe;
 
 #ifdef CONFIG_CPUID_H
-static bool have_movbe;
 static bool have_bmi2;
 static bool have_lzcnt;
 #else
-# define have_movbe 0
 # define have_bmi2 0
 # define have_lzcnt 0
 #endif
@@ -1986,13 +1985,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, int index, intptr_t ofs,
                                    int seg, bool is64, MemOp memop)
 {
-    const MemOp real_bswap = memop & MO_BSWAP;
-    MemOp bswap = real_bswap;
+    bool use_movbe = false;
     int rexw = is64 * P_REXW;
     int movop = OPC_MOVL_GvEv;
 
-    if (have_movbe && real_bswap) {
-        bswap = 0;
+    /* Do big-endian loads with movbe.  */
+    if (memop & MO_BSWAP) {
+        tcg_debug_assert(have_movbe);
+        use_movbe = true;
         movop = OPC_MOVBE_GyMy;
     }
 
@@ -2006,23 +2006,28 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                  base, index, 0, ofs);
         break;
     case MO_UW:
-        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
-                                 base, index, 0, ofs);
-        if (real_bswap) {
-            tcg_out_rolw_8(s, datalo);
-        }
-        break;
-    case MO_SW:
-        if (real_bswap) {
-            if (have_movbe) {
+        if (use_movbe) {
+            /* There is no extending movbe; only low 16-bits are modified.  */
+            if (datalo != base && datalo != index) {
+                /* XOR breaks dependency chains.  */
+                tgen_arithr(s, ARITH_XOR, datalo, datalo);
                 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
                                          datalo, base, index, 0, ofs);
             } else {
-                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
-                                         base, index, 0, ofs);
-                tcg_out_rolw_8(s, datalo);
+                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                         datalo, base, index, 0, ofs);
+                tcg_out_ext16u(s, datalo, datalo);
             }
-            tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo);
+        } else {
+            tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+                                     base, index, 0, ofs);
+        }
+        break;
+    case MO_SW:
+        if (use_movbe) {
+            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                     datalo, base, index, 0, ofs);
+            tcg_out_ext16s(s, datalo, datalo, rexw);
         } else {
             tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
                                      datalo, base, index, 0, ofs);
@@ -2030,18 +2035,12 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         break;
     case MO_UL:
         tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
-        if (bswap) {
-            tcg_out_bswap32(s, datalo);
-        }
         break;
 #if TCG_TARGET_REG_BITS == 64
     case MO_SL:
-        if (real_bswap) {
-            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+        if (use_movbe) {
+            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
                                      base, index, 0, ofs);
-            if (bswap) {
-                tcg_out_bswap32(s, datalo);
-            }
             tcg_out_ext32s(s, datalo, datalo);
         } else {
             tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
@@ -2053,12 +2052,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         if (TCG_TARGET_REG_BITS == 64) {
             tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
                                      base, index, 0, ofs);
-            if (bswap) {
-                tcg_out_bswap64(s, datalo);
-            }
         } else {
-            if (real_bswap) {
-                int t = datalo;
+            if (use_movbe) {
+                TCGReg t = datalo;
                 datalo = datahi;
                 datahi = t;
             }
@@ -2073,14 +2069,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
                                          base, index, 0, ofs);
             }
-            if (bswap) {
-                tcg_out_bswap32(s, datalo);
-                tcg_out_bswap32(s, datahi);
-            }
         }
         break;
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
@@ -2128,24 +2120,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, int index, intptr_t ofs,
                                    int seg, MemOp memop)
 {
-    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
-       we could perform the bswap twice to restore the original value
-       instead of moving to the scratch.  But as it is, the L constraint
-       means that TCG_REG_L0 is definitely free here.  */
     const TCGReg scratch = TCG_REG_L0;
-    const MemOp real_bswap = memop & MO_BSWAP;
-    MemOp bswap = real_bswap;
+    bool use_movbe = false;
     int movop = OPC_MOVL_EvGv;
 
-    if (have_movbe && real_bswap) {
-        bswap = 0;
+    /*
+     * Do big-endian stores with movbe or softmmu.
+     * User-only without movbe will have its swapping done generically.
+     */
+    if (memop & MO_BSWAP) {
+        tcg_debug_assert(have_movbe);
+        use_movbe = true;
         movop = OPC_MOVBE_MyGy;
     }
 
     switch (memop & MO_SIZE) {
     case MO_8:
-        /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
-           Use the scratch register if necessary.  */
+        /*
+         * In 32-bit mode, 8-bit stores can only happen from [abcd]x.
+         * TODO: Adjust constraints such that this is is forced,
+         * then we won't need a scratch at all for user-only.
+         */
         if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             datalo = scratch;
@@ -2154,43 +2149,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                  datalo, base, index, 0, ofs);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_rolw_8(s, scratch);
-            datalo = scratch;
-        }
         tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
                                  base, index, 0, ofs);
         break;
     case MO_32:
-        if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_bswap32(s, scratch);
-            datalo = scratch;
-        }
         tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
         break;
     case MO_64:
         if (TCG_TARGET_REG_BITS == 64) {
-            if (bswap) {
-                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
-                tcg_out_bswap64(s, scratch);
-                datalo = scratch;
-            }
             tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
                                      base, index, 0, ofs);
-        } else if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
-            tcg_out_bswap32(s, scratch);
-            tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
-                                     base, index, 0, ofs);
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_bswap32(s, scratch);
-            tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
-                                     base, index, 0, ofs + 4);
         } else {
-            if (real_bswap) {
-                int t = datalo;
+            if (use_movbe) {
+                TCGReg t = datalo;
                 datalo = datahi;
                 datahi = t;
             }
@@ -2201,7 +2172,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         }
         break;
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
-- 
2.25.1



  parent reply	other threads:[~2021-01-07 20:17 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-07 20:14 [PULL 00/47] tcg patch queue Richard Henderson
2021-01-07 20:14 ` [PULL 01/47] linux-user: Conditionalize TUNSETVNETLE Richard Henderson
2021-01-07 20:14 ` Richard Henderson [this message]
2021-01-07 20:14 ` [PULL 03/47] tcg: Introduce INDEX_op_qemu_st8_i32 Richard Henderson
2021-01-07 20:14 ` [PULL 04/47] util/oslib-win32: Use _aligned_malloc for qemu_try_memalign Richard Henderson
2021-01-10 23:18   ` Volker Rümelin
2021-01-11  3:10     ` 罗勇刚(Yonggang Luo)
2021-01-07 20:14 ` [PULL 05/47] util/oslib: Assert qemu_try_memalign() alignment is a power of 2 Richard Henderson
2021-01-07 20:14 ` [PULL 06/47] tcg: Do not flush icache for interpreter Richard Henderson
2021-01-07 20:14 ` [PULL 07/47] util: Enhance flush_icache_range with separate data pointer Richard Henderson
2021-01-07 20:14 ` [PULL 08/47] util: Specialize flush_idcache_range for aarch64 Richard Henderson
2021-01-07 20:14 ` [PULL 09/47] tcg: Move tcg prologue pointer out of TCGContext Richard Henderson
2021-01-07 20:14 ` [PULL 10/47] tcg: Move tcg epilogue " Richard Henderson
2021-01-07 20:14 ` [PULL 11/47] tcg: Add in_code_gen_buffer Richard Henderson
2021-01-07 20:14 ` [PULL 12/47] tcg: Introduce tcg_splitwx_to_{rx,rw} Richard Henderson
2021-01-07 20:14 ` [PULL 13/47] tcg: Adjust TCGLabel for const Richard Henderson
2021-01-07 20:14 ` [PULL 14/47] tcg: Adjust tcg_out_call " Richard Henderson
2021-01-07 20:14 ` [PULL 15/47] tcg: Adjust tcg_out_label " Richard Henderson
2021-01-07 20:14 ` [PULL 16/47] tcg: Adjust tcg_register_jit " Richard Henderson
2021-01-07 20:14 ` [PULL 17/47] tcg: Adjust tb_target_set_jmp_target for split-wx Richard Henderson
2021-01-07 20:14 ` [PULL 18/47] tcg: Make DisasContextBase.tb const Richard Henderson
2021-01-07 20:14 ` [PULL 19/47] tcg: Make tb arg to synchronize_from_tb const Richard Henderson
2021-01-07 20:14 ` [PULL 20/47] tcg: Use Error with alloc_code_gen_buffer Richard Henderson
2021-01-07 20:14 ` [PULL 21/47] tcg: Add --accel tcg,split-wx property Richard Henderson
2021-01-07 20:14 ` [PULL 22/47] accel/tcg: Support split-wx for linux with memfd Richard Henderson
2021-01-07 20:14 ` [PULL 23/47] accel/tcg: Support split-wx for darwin/iOS with vm_remap Richard Henderson
2021-01-07 20:14 ` [PULL 24/47] tcg: Return the TB pointer from the rx region from exit_tb Richard Henderson
2021-01-07 20:14 ` [PULL 25/47] tcg/i386: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 26/47] tcg/aarch64: Use B not BL for tcg_out_goto_long Richard Henderson
2021-01-07 20:14 ` [PULL 27/47] tcg/aarch64: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 28/47] disas: Push const down through host disassembly Richard Henderson
2021-01-07 20:14 ` [PULL 29/47] tcg/tci: Push const down through bytecode reading Richard Henderson
2021-01-07 20:14 ` [PULL 30/47] tcg: Introduce tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 31/47] tcg/ppc: Use tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 32/47] tcg/ppc: Use tcg_out_mem_long to reset TCG_REG_TB Richard Henderson
2021-01-07 20:14 ` [PULL 33/47] tcg/ppc: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 34/47] tcg/sparc: Use tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 35/47] tcg/sparc: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 36/47] tcg/s390: Use tcg_tbrel_diff Richard Henderson
2021-01-07 20:14 ` [PULL 37/47] tcg/s390: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 38/47] tcg/riscv: Fix branch range checks Richard Henderson
2021-01-07 20:14 ` [PULL 39/47] tcg/riscv: Remove branch-over-branch fallback Richard Henderson
2021-01-07 20:14 ` [PULL 40/47] tcg/riscv: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 41/47] accel/tcg: Add mips support to alloc_code_gen_buffer_splitwx_memfd Richard Henderson
2021-01-07 20:14 ` [PULL 42/47] tcg/mips: Do not assert on relocation overflow Richard Henderson
2021-01-07 20:14 ` [PULL 43/47] tcg/mips: Support split-wx code generation Richard Henderson
2021-01-07 20:14 ` [PULL 44/47] tcg/arm: " Richard Henderson
2021-01-07 20:14 ` [PULL 45/47] tcg: Remove TCG_TARGET_SUPPORT_MIRROR Richard Henderson
2021-01-07 20:14 ` [PULL 46/47] tcg: Constify tcg_code_gen_epilogue Richard Henderson
2021-01-07 20:14 ` [PULL 47/47] tcg: Constify TCGLabelQemuLdst.raddr Richard Henderson
2021-01-07 21:03 ` [PULL 00/47] tcg patch queue no-reply
2021-01-08 10:28 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210107201448.1152301-3-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=f4bug@amsat.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).