qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: qemu-devel@nongnu.org
Cc: Aurelien Jarno <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH 4/5] tcg/i386: use movbe instruction in qemu_ldst routines
Date: Sat, 21 Dec 2013 17:43:43 +0100	[thread overview]
Message-ID: <1387644224-2404-5-git-send-email-aurelien@aurel32.net> (raw)
In-Reply-To: <1387644224-2404-1-git-send-email-aurelien@aurel32.net>

The movbe instruction has been added on some Intel Atom CPUs and on
recent Intel Haswell CPUs. It allows to load/store a value and at the
same time bswap it.

This patch detects the avaibility of this instruction and when available
use it in the qemu load/store routines in replacement of load/store +
bswap. Note that for 16-bit unsigned loads, movbe + movzw is basically the
same as movzw + bswap, so the patch doesn't touch this case.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/i386/tcg-target.c |  152 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 107 insertions(+), 45 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index e247829..8fbb0be 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -99,18 +99,31 @@ static const int tcg_target_call_oarg_regs[] = {
 # define TCG_REG_L1 TCG_REG_EDX
 #endif
 
+/* The host compiler should supply <cpuid.h> to enable runtime features
+   detection, as we're not going to go so far as our own inline assembly.
+   If not available, default values will be assumed.  */
+#if defined(CONFIG_CPUID_H)
+#include <cpuid.h>
+#endif
+
 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
-   is available.  However, the host compiler must supply <cpuid.h>, as we're
-   not going to go so far as our own inline assembly.  */
+   is available.  */
 #if TCG_TARGET_REG_BITS == 64
 # define have_cmov 1
 #elif defined(CONFIG_CPUID_H)
-#include <cpuid.h>
 static bool have_cmov;
 #else
 # define have_cmov 0
 #endif
 
+/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
+   going to attempt to determine at runtime whether movbe is available.  */
+#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
+static bool have_movbe;
+#else
+# define have_movbe 0
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -280,6 +293,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVB_EvIz   (0xc6)
 #define OPC_MOVL_EvIz	(0xc7)
 #define OPC_MOVL_Iv     (0xb8)
+#define OPC_MOVBE_GyMy  (0xf0 | P_EXT2)
+#define OPC_MOVBE_MyGy  (0xf1 | P_EXT2)
 #define OPC_MOVSBL	(0xbe | P_EXT)
 #define OPC_MOVSWL	(0xbf | P_EXT)
 #define OPC_MOVSLQ	(0x63 | P_REXW)
@@ -1363,8 +1378,13 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         break;
     case MO_SW:
         if (bswap) {
-            tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
-            tcg_out_rolw_8(s, datalo);
+            if (have_movbe) {
+                tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                     datalo, base, ofs);
+            } else {
+                tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
+                tcg_out_rolw_8(s, datalo);
+            }
             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
         } else {
             tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
@@ -1372,16 +1392,25 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         }
         break;
     case MO_UL:
-        tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
-        if (bswap) {
-            tcg_out_bswap32(s, datalo);
+        if (bswap && have_movbe) {
+            tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + seg, datalo, base, ofs);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
+            if (bswap) {
+                tcg_out_bswap32(s, datalo);
+            }
         }
         break;
 #if TCG_TARGET_REG_BITS == 64
     case MO_SL:
         if (bswap) {
-            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
-            tcg_out_bswap32(s, datalo);
+            if (have_movbe) {
+                tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + seg,
+                                     datalo, base, ofs);
+            } else {
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
+                tcg_out_bswap32(s, datalo);
+            }
             tcg_out_ext32s(s, datalo, datalo);
         } else {
             tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
@@ -1390,29 +1419,34 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 #endif
     case MO_Q:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
-                                 datalo, base, ofs);
-            if (bswap) {
-                tcg_out_bswap64(s, datalo);
+            if (bswap && have_movbe) {
+                tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_REXW + seg,
+                                     datalo, base, ofs);
+            } else {
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+                                     datalo, base, ofs);
+                if (bswap) {
+                    tcg_out_bswap64(s, datalo);
+                }
             }
         } else {
+            int opc = OPC_MOVL_GvEv;
             if (bswap) {
                 int t = datalo;
                 datalo = datahi;
                 datahi = t;
+                if (have_movbe) {
+                    opc = OPC_MOVBE_GyMy;
+                }
             }
             if (base != datalo) {
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datalo, base, ofs);
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, opc + seg, datalo, base, ofs);
+                tcg_out_modrm_offset(s, opc + seg, datahi, base, ofs + 4);
             } else {
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datahi, base, ofs + 4);
-                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
-                                     datalo, base, ofs);
+                tcg_out_modrm_offset(s, opc + seg, datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, opc + seg, datalo, base, ofs);
             }
-            if (bswap) {
+            if (bswap && opc != OPC_MOVBE_GyMy) {
                 tcg_out_bswap32(s, datalo);
                 tcg_out_bswap32(s, datahi);
             }
@@ -1506,31 +1540,48 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                              datalo, base, ofs);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_rolw_8(s, scratch);
-            datalo = scratch;
+        if (bswap & have_movbe) {
+            tcg_out_modrm_offset(s, OPC_MOVBE_MyGy + P_DATA16 + seg,
+                                 datalo, base, ofs);
+        } else {
+            if (bswap) {
+                tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+                tcg_out_rolw_8(s, scratch);
+                datalo = scratch;
+            }
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+                                 datalo, base, ofs);
         }
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
-                             datalo, base, ofs);
         break;
     case MO_32:
-        if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_bswap32(s, scratch);
-            datalo = scratch;
+        if (bswap & have_movbe) {
+            tcg_out_modrm_offset(s, OPC_MOVBE_MyGy + seg, datalo, base, ofs);
+        } else {
+            if (bswap) {
+                tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+                tcg_out_bswap32(s, scratch);
+                datalo = scratch;
+            }
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
         }
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
         break;
     case MO_64:
         if (TCG_TARGET_REG_BITS == 64) {
-            if (bswap) {
-                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
-                tcg_out_bswap64(s, scratch);
-                datalo = scratch;
+            if (bswap && have_movbe) {
+                tcg_out_modrm_offset(s, OPC_MOVBE_MyGy + P_REXW + seg,
+                                     datalo, base, ofs);
+            } else {
+                if (bswap) {
+                    tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
+                    tcg_out_bswap64(s, scratch);
+                    datalo = scratch;
+                }
+                tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+                                     datalo, base, ofs);
             }
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
-                                 datalo, base, ofs);
+        } else if (bswap && have_movbe) {
+            tcg_out_modrm_offset(s, OPC_MOVBE_MyGy + seg, datahi, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVBE_MyGy + seg, datalo, base, ofs+4);
         } else if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
             tcg_out_bswap32(s, scratch);
@@ -2167,13 +2218,24 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_target_init(TCGContext *s)
 {
-    /* For 32-bit, 99% certainty that we're running on hardware that supports
-       cmov, but we still need to check.  In case cmov is not available, we'll
-       use a small forward branch.  */
-#ifndef have_cmov
+#if !(defined(have_cmov) && defined(have_movbe))
     {
         unsigned a, b, c, d;
-        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
+        int ret;
+        ret = __get_cpuid(1, &a, &b, &c, &d);
+
+# ifndef have_cmov
+        /* For 32-bit, 99% certainty that we're running on hardware that
+           supports cmov, but we still need to check.  In case cmov is not
+           available, we'll use a small forward branch.  */
+        have_cmov = ret && (d & bit_CMOV);
+# endif
+
+# ifndef have_movbe
+        /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
+           need to probe for it.  */
+        have_movbe = ret && (c & bit_MOVBE);
+# endif
     }
 #endif
 
-- 
1.7.10.4

  parent reply	other threads:[~2013-12-21 16:44 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-21 16:43 [Qemu-devel] [PATCH 0/5] tcg/i386: use movbe instruction in qemu_ldst routines Aurelien Jarno
2013-12-21 16:43 ` [Qemu-devel] [PATCH 1/5] disas/i386.c: disassemble movbe instruction Aurelien Jarno
2013-12-22 16:43   ` Richard Henderson
2013-12-21 16:43 ` [Qemu-devel] [PATCH 2/5] tcg/i386: remove hardcoded P_REXW value Aurelien Jarno
2013-12-22 16:43   ` Richard Henderson
2013-12-21 16:43 ` [Qemu-devel] [PATCH 3/5] tcg/i386: add support for three-byte opcodes Aurelien Jarno
2013-12-22 16:46   ` Richard Henderson
2013-12-21 16:43 ` Aurelien Jarno [this message]
2013-12-22 16:52   ` [Qemu-devel] [PATCH 4/5] tcg/i386: use movbe instruction in qemu_ldst routines Richard Henderson
2013-12-21 16:43 ` [Qemu-devel] [PATCH 5/5] tcg/i386: cleanup useless #ifdef Aurelien Jarno
2013-12-22 16:44   ` Richard Henderson
2013-12-22 11:24 ` [Qemu-devel] [PATCH 0/5] tcg/i386: use movbe instruction in qemu_ldst routines Aurelien Jarno
2013-12-22 11:47   ` Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1387644224-2404-5-git-send-email-aurelien@aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).