qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3
@ 2010-05-21 15:30 Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 01/15] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
                   ` (15 more replies)
  0 siblings, 16 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Changes v2->v3:
  * ext8u and ext8s operate on 'q' register inputs only.
  * pushi is inline and loses the ifdef.
  * trailing whitespace cleanup


r~


Richard Henderson (15):
  tcg-i386: Tidy ext8u and ext16u operations.
  tcg-i386: Tidy ext8s and ext16s operations.
  tcg-i386: Tidy immediate arithmetic operations.
  tcg-i386: Tidy non-immediate arithmetic operations.
  tcg-i386: Tidy movi.
  tcg-i386: Tidy push/pop.
  tcg-i386: Tidy calls.
  tcg-i386: Tidy ret.
  tcg-i386: Tidy setcc.
  tcg-i386: Tidy unary arithmetic.
  tcg-i386: Tidy multiply.
  tcg-i386: Tidy xchg.
  tcg-i386: Tidy lea.
  tcg-i386: Use lea for three-operand add.
  tcg-i386: Nuke trailing whitespace.

 tcg/i386/tcg-target.c |  504 ++++++++++++++++++++++++++++++-------------------
 1 files changed, 308 insertions(+), 196 deletions(-)

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 01/15] tcg-i386: Tidy ext8u and ext16u operations.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 02/15] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define OPC_MOVZBL and OPC_MOVZWL.  Factor opcode emission to
separate functions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   57 ++++++++++++++++++++++++++----------------------
 1 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 904d2e5..4497010 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -166,6 +166,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVZBL	(0xb6 | P_EXT)
+#define OPC_MOVZWL	(0xb7 | P_EXT)
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
@@ -209,8 +211,6 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-#define P_EXT   0x100 /* 0x0f opcode prefix */
-
 static const uint8_t tcg_cond_to_jcc[10] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
@@ -323,6 +323,19 @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
     tcg_out_shifti(s, SHIFT_ROL, reg, 8);
 }
 
+static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
+{
+    /* movzbl */
+    assert(src < 4);
+    tcg_out_modrm(s, OPC_MOVZBL, dest, src);
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
+{
+    /* movzwl */
+    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
+}
+
 static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
 {
     if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
@@ -335,11 +348,9 @@ static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf
         tcg_out_modrm(s, 0x83, c, r0);
         tcg_out8(s, val);
     } else if (c == ARITH_AND && val == 0xffu && r0 < 4) {
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT, r0, r0);
+        tcg_out_ext8u(s, r0, r0);
     } else if (c == ARITH_AND && val == 0xffffu) {
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0);
+        tcg_out_ext16u(s, r0, r0);
     } else {
         tcg_out_modrm(s, 0x81, c, r0);
         tcg_out32(s, val);
@@ -677,12 +688,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX);
         break;
     case 0:
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
         break;
     case 1:
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
         break;
     case 2:
     default:
@@ -722,7 +731,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     switch(opc) {
     case 0:
         /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVZBL, data_reg, r0, GUEST_BASE);
         break;
     case 0 | 4:
         /* movsbl */
@@ -730,7 +739,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 1:
         /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
             tcg_out_rolw_8(s, data_reg);
         }
@@ -870,12 +879,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     } else {
         switch(opc) {
         case 0:
-            /* movzbl */
-            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_EDX, data_reg);
+            tcg_out_ext8u(s, TCG_REG_EDX, data_reg);
             break;
         case 1:
-            /* movzwl */
-            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_EDX, data_reg);
+            tcg_out_ext16u(s, TCG_REG_EDX, data_reg);
             break;
         case 2:
             tcg_out_mov(s, TCG_REG_EDX, data_reg);
@@ -901,12 +908,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
         switch(opc) {
         case 0:
-            /* movzbl */
-            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_ECX, data_reg);
+            tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
             break;
         case 1:
-            /* movzwl */
-            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_ECX, data_reg);
+            tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
             break;
         case 2:
             tcg_out_mov(s, TCG_REG_ECX, data_reg);
@@ -1035,7 +1040,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld8u_i32:
         /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld8s_i32:
         /* movsbl */
@@ -1043,7 +1048,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld16u_i32:
         /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld16s_i32:
         /* movswl */
@@ -1181,10 +1186,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
         break;
     case INDEX_op_ext8u_i32:
-        tcg_out_modrm(s, 0xb6 | P_EXT, args[0], args[1]);
+        tcg_out_ext8u(s, args[0], args[1]);
         break;
     case INDEX_op_ext16u_i32:
-        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
+        tcg_out_ext16u(s, args[0], args[1]);
         break;
 
     case INDEX_op_setcond_i32:
@@ -1279,8 +1284,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_ext8s_i32, { "r", "q" } },
     { INDEX_op_ext16s_i32, { "r", "r" } },
-    { INDEX_op_ext8u_i32, { "r", "q"} },
-    { INDEX_op_ext16u_i32, { "r", "r"} },
+    { INDEX_op_ext8u_i32, { "r", "q" } },
+    { INDEX_op_ext16u_i32, { "r", "r" } },
 
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 02/15] tcg-i386: Tidy ext8s and ext16s operations.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 01/15] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 03/15] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define OPC_MOVSBL and OPC_MOVSWL.  Factor opcode emission to
separate functions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   35 ++++++++++++++++++++++++-----------
 1 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4497010..949e974 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -166,6 +166,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVSBL	(0xbe | P_EXT)
+#define OPC_MOVSWL	(0xbf | P_EXT)
 #define OPC_MOVZBL	(0xb6 | P_EXT)
 #define OPC_MOVZWL	(0xb7 | P_EXT)
 #define OPC_SHIFT_1	(0xd1)
@@ -330,12 +332,25 @@ static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
     tcg_out_modrm(s, OPC_MOVZBL, dest, src);
 }
 
+static void tcg_out_ext8s(TCGContext *s, int dest, int src)
+{
+    /* movsbl */
+    assert(src < 4);
+    tcg_out_modrm(s, OPC_MOVSBL, dest, src);
+}
+
 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
 {
     /* movzwl */
     tcg_out_modrm(s, OPC_MOVZWL, dest, src);
 }
 
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
+{
+    /* movswl */
+    tcg_out_modrm(s, OPC_MOVSWL, dest, src);
+}
+
 static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
 {
     if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
@@ -680,12 +695,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     switch(opc) {
     case 0 | 4:
-        /* movsbl */
-        tcg_out_modrm(s, 0xbe | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext8s(s, data_reg, TCG_REG_EAX);
         break;
     case 1 | 4:
-        /* movswl */
-        tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext16s(s, data_reg, TCG_REG_EAX);
         break;
     case 0:
         tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
@@ -735,7 +748,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 0 | 4:
         /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, data_reg, r0, GUEST_BASE);
         break;
     case 1:
         /* movzwl */
@@ -746,12 +759,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 1 | 4:
         /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
             tcg_out_rolw_8(s, data_reg);
 
             /* movswl data_reg, data_reg */
-            tcg_out_modrm(s, 0xbf | P_EXT, data_reg, data_reg);
+            tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
         }
         break;
     case 2:
@@ -1044,7 +1057,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld8s_i32:
         /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld16u_i32:
         /* movzwl */
@@ -1052,7 +1065,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld16s_i32:
         /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVSWL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld_i32:
         tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
@@ -1180,10 +1193,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_ext8s_i32:
-        tcg_out_modrm(s, 0xbe | P_EXT, args[0], args[1]);
+        tcg_out_ext8s(s, args[0], args[1]);
         break;
     case INDEX_op_ext16s_i32:
-        tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
+        tcg_out_ext16s(s, args[0], args[1]);
         break;
     case INDEX_op_ext8u_i32:
         tcg_out_ext8u(s, args[0], args[1]);
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 03/15] tcg-i386: Tidy immediate arithmetic operations.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 01/15] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 02/15] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 04/15] tcg-i386: Tidy non-immediate " Richard Henderson
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define OPC_ARITH_EvI[bz]; use throughout.  Use tcg_out_ext8u
directly in setcond.  Use tgen_arithi in qemu_ld/st.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   26 ++++++++++----------------
 1 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 949e974..5e8c58b 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -158,6 +158,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 
 #define P_EXT   0x100 /* 0x0f opcode prefix */
 
+#define OPC_ARITH_EvIz	(0x81)
+#define OPC_ARITH_EvIb	(0x83)
 #define OPC_BSWAP	(0xc8 | P_EXT)
 #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
@@ -360,14 +362,14 @@ static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf
         /* dec */
         tcg_out_opc(s, 0x48 + r0);
     } else if (val == (int8_t)val) {
-        tcg_out_modrm(s, 0x83, c, r0);
+        tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
         tcg_out8(s, val);
     } else if (c == ARITH_AND && val == 0xffu && r0 < 4) {
         tcg_out_ext8u(s, r0, r0);
     } else if (c == ARITH_AND && val == 0xffffu) {
         tcg_out_ext16u(s, r0, r0);
     } else {
-        tcg_out_modrm(s, 0x81, c, r0);
+        tcg_out_modrm(s, OPC_ARITH_EvIz, c, r0);
         tcg_out32(s, val);
     }
 }
@@ -536,7 +538,7 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg dest,
     tcg_out_cmp(s, arg1, arg2, const_arg2);
     /* setcc */
     tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, dest);
-    tgen_arithi(s, ARITH_AND, dest, 0xff, 0);
+    tcg_out_ext8u(s, dest, dest);
 }
 
 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
@@ -638,16 +640,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
 #if defined(CONFIG_SOFTMMU)
     tcg_out_mov(s, r1, addr_reg); 
-
     tcg_out_mov(s, r0, addr_reg); 
- 
+
     tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+    tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
     tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
     tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
@@ -834,16 +832,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
 #if defined(CONFIG_SOFTMMU)
     tcg_out_mov(s, r1, addr_reg); 
-
     tcg_out_mov(s, r0, addr_reg); 
  
     tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+    tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
     tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
     tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 04/15] tcg-i386: Tidy non-immediate arithmetic operations.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (2 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 03/15] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 05/15] tcg-i386: Tidy movi Richard Henderson
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Add more OPC values, and tgen_arithr.  Use the later throughout.

Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form
instead of the Ev,Gv opcode form used previously.  Both forms
disassemble properly, and so there's no visible change when diffing
log files before and after the change.  This change makes the operand
ordering within the output routines more natural, and avoids the need
to define an OPC_ARITH_EvGv since a read-modify-write with memory is
not needed within TCG.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   78 ++++++++++++++++++++++++++++++-------------------
 1 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 5e8c58b..bc5985f 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -160,7 +160,12 @@ static inline int tcg_target_const_match(tcg_target_long val,
 
 #define OPC_ARITH_EvIz	(0x81)
 #define OPC_ARITH_EvIb	(0x83)
+#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
+#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
 #define OPC_BSWAP	(0xc8 | P_EXT)
+#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32	(0x48)
+#define OPC_INC_r32	(0x40)
 #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
 #define OPC_JMP_long	(0xe9)
@@ -175,6 +180,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
+#define OPC_TESTL	(0x85)
 
 /* Group 1 opcode extensions for 0x80-0x83.  */
 #define ARITH_ADD 0
@@ -275,6 +281,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
     }
 }
 
+/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
+}
+
 static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
 {
     if (arg != ret) {
@@ -286,8 +298,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 int ret, int32_t arg)
 {
     if (arg == 0) {
-        /* xor r0,r0 */
-        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
+        tgen_arithr(s, ARITH_XOR, ret, ret);
     } else {
         tcg_out8(s, 0xb8 + ret);
         tcg_out32(s, arg);
@@ -353,14 +364,15 @@ static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
     tcg_out_modrm(s, OPC_MOVSWL, dest, src);
 }
 
-static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
+static inline void tgen_arithi(TCGContext *s, int c, int r0,
+                               int32_t val, int cf)
 {
-    if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
-        /* inc */
-        tcg_out_opc(s, 0x40 + r0);
-    } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
-        /* dec */
-        tcg_out_opc(s, 0x48 + r0);
+    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+       partial flags update stalls on Pentium4 and are not recommended
+       by current Intel optimization manuals.  */
+    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+        int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
+        tcg_out_opc(s, opc + r0);
     } else if (val == (int8_t)val) {
         tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
         tcg_out8(s, val);
@@ -433,12 +445,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
     if (const_arg2) {
         if (arg2 == 0) {
             /* test r, r */
-            tcg_out_modrm(s, 0x85, arg1, arg1);
+            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
         } else {
             tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
         }
     } else {
-        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+        tgen_arithr(s, ARITH_CMP, arg1, arg2);
     }
 }
 
@@ -653,7 +665,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
 
     /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
     
     tcg_out_mov(s, r0, addr_reg);
     
@@ -669,7 +681,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     s->code_ptr++;
     
     /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
 
     /* je label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JE);
@@ -728,7 +740,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     *label1_ptr = s->code_ptr - label1_ptr - 1;
 
     /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+                         offsetof(CPUTLBEntry, addend) - 
                          offsetof(CPUTLBEntry, addr_read));
 #else
     r0 = addr_reg;
@@ -845,7 +858,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
 
     /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
     
     tcg_out_mov(s, r0, addr_reg);
     
@@ -861,7 +874,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     s->code_ptr++;
     
     /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
 
     /* je label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JE);
@@ -942,7 +955,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     *label1_ptr = s->code_ptr - label1_ptr - 1;
 
     /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+                         offsetof(CPUTLBEntry, addend) - 
                          offsetof(CPUTLBEntry, addr_write));
 #else
     r0 = addr_reg;
@@ -1094,7 +1108,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (const_args[2]) {
             tgen_arithi(s, c, args[0], args[2], 0);
         } else {
-            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
+            tgen_arithr(s, c, args[0], args[2]);
         }
         break;
     case INDEX_op_mul_i32:
@@ -1144,24 +1158,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         goto gen_shift32;
 
     case INDEX_op_add2_i32:
-        if (const_args[4]) 
+        if (const_args[4]) {
             tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
-        if (const_args[5]) 
+        } else {
+            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+        }
+        if (const_args[5]) {
             tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
+        } else {
+            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+        }
         break;
     case INDEX_op_sub2_i32:
-        if (const_args[4]) 
+        if (const_args[4]) {
             tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
-        if (const_args[5]) 
+        } else {
+            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+        }
+        if (const_args[5]) {
             tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
+        } else {
+            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+        }
         break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 05/15] tcg-i386: Tidy movi.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (3 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 04/15] tcg-i386: Tidy non-immediate " Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 06/15] tcg-i386: Tidy push/pop Richard Henderson
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define and use OPC_MOVL_Iv.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bc5985f..1d227db 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -173,6 +173,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVL_Iv     (0xb8)
 #define OPC_MOVSBL	(0xbe | P_EXT)
 #define OPC_MOVSWL	(0xbf | P_EXT)
 #define OPC_MOVZBL	(0xb6 | P_EXT)
@@ -300,7 +301,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
     if (arg == 0) {
         tgen_arithr(s, ARITH_XOR, ret, ret);
     } else {
-        tcg_out8(s, 0xb8 + ret);
+        tcg_out8(s, OPC_MOVL_Iv + ret);
         tcg_out32(s, arg);
     }
 }
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 06/15] tcg-i386: Tidy push/pop.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (4 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 05/15] tcg-i386: Tidy movi Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 07/15] tcg-i386: Tidy calls Richard Henderson
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Move tcg_out_push/pop up in the file so that they can be used
by qemu_ld/st.  Define a tcg_out_pushi to be used as well.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   48 ++++++++++++++++++++++++++++++------------------
 1 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 1d227db..a487375 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -178,6 +178,10 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVSWL	(0xbf | P_EXT)
 #define OPC_MOVZBL	(0xb6 | P_EXT)
 #define OPC_MOVZWL	(0xb7 | P_EXT)
+#define OPC_POP_r32	(0x58)
+#define OPC_PUSH_r32	(0x50)
+#define OPC_PUSH_Iv	(0x68)
+#define OPC_PUSH_Ib	(0x6a)
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
@@ -306,6 +310,27 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
     }
 }
 
+static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
+{
+    if (val == (int8_t)val) {
+        tcg_out_opc(s, OPC_PUSH_Ib);
+        tcg_out8(s, val);
+    } else {
+        tcg_out_opc(s, OPC_PUSH_Iv);
+        tcg_out32(s, val);
+    }
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_PUSH_r32 + reg);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_POP_r32 + reg);
+}
+
 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
                               int arg1, tcg_target_long arg2)
 {
@@ -891,8 +916,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     if (opc == 3) {
         tcg_out_mov(s, TCG_REG_EDX, data_reg);
         tcg_out_mov(s, TCG_REG_ECX, data_reg2);
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
+        tcg_out_pushi(s, mem_index);
         tcg_out8(s, 0xe8);
         tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
                   (tcg_target_long)s->code_ptr - 4);
@@ -917,10 +941,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 #else
     if (opc == 3) {
         tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out_opc(s, 0x50 + data_reg2); /* push */
-        tcg_out_opc(s, 0x50 + data_reg); /* push */
+        tcg_out_pushi(s, mem_index);
+        tcg_out_push(s, data_reg2);
+        tcg_out_push(s, data_reg);
         tcg_out8(s, 0xe8);
         tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
                   (tcg_target_long)s->code_ptr - 4);
@@ -938,8 +961,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             tcg_out_mov(s, TCG_REG_ECX, data_reg);
             break;
         }
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
+        tcg_out_pushi(s, mem_index);
         tcg_out8(s, 0xe8);
         tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
                   (tcg_target_long)s->code_ptr - 4);
@@ -1352,16 +1374,6 @@ static int tcg_target_callee_save_regs[] = {
     TCG_REG_EDI,
 };
 
-static inline void tcg_out_push(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, 0x50 + reg);
-}
-
-static inline void tcg_out_pop(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, 0x58 + reg);
-}
-
 /* Generate global QEMU prologue and epilogue code */
 void tcg_target_qemu_prologue(TCGContext *s)
 {
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 07/15] tcg-i386: Tidy calls.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (5 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 06/15] tcg-i386: Tidy push/pop Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 08/15] tcg-i386: Tidy ret Richard Henderson
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define OPC_CALL_Jz, generated by tcg_out_calli; use the later
throughout.  Unify the calls within qemu_st; adjust the stack
with a single pop if applicable.

Define and use EXT_CALLN_Ev for indirect calls.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   49 +++++++++++++++++++++++++++----------------------
 1 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index a487375..28a01f3 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -163,6 +163,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
 #define OPC_BSWAP	(0xc8 | P_EXT)
+#define OPC_CALL_Jz	(0xe8)
 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
 #define OPC_DEC_r32	(0x48)
 #define OPC_INC_r32	(0x40)
@@ -205,6 +206,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define SHIFT_SAR 7
 
 /* Group 5 opcode extensions for 0xff.  */
+#define EXT_CALLN_Ev	2
 #define EXT_JMPN_Ev	4
 
 /* Condition codes to be added to OPC_JCC_{long,short}.  */
@@ -621,6 +623,12 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
     }
 }
 
+static void tcg_out_calli(TCGContext *s, tcg_target_long dest)
+{
+    tcg_out_opc(s, OPC_CALL_Jz);
+    tcg_out32(s, dest - (tcg_target_long)s->code_ptr - 4);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -725,9 +733,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
 #endif
-    tcg_out8(s, 0xe8);
-    tcg_out32(s, (tcg_target_long)qemu_ld_helpers[s_bits] - 
-              (tcg_target_long)s->code_ptr - 4);
+    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
     switch(opc) {
     case 0 | 4:
@@ -844,6 +850,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 {
     int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
 #if defined(CONFIG_SOFTMMU)
+    int stack_adjust;
     uint8_t *label1_ptr, *label2_ptr;
 #endif
 #if TARGET_LONG_BITS == 64
@@ -917,10 +924,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_mov(s, TCG_REG_EDX, data_reg);
         tcg_out_mov(s, TCG_REG_ECX, data_reg2);
         tcg_out_pushi(s, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 4);
+        stack_adjust = 4;
     } else {
         switch(opc) {
         case 0:
@@ -934,9 +938,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             break;
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
+        stack_adjust = 0;
     }
 #else
     if (opc == 3) {
@@ -944,10 +946,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_pushi(s, mem_index);
         tcg_out_push(s, data_reg2);
         tcg_out_push(s, data_reg);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 12);
+        stack_adjust = 12;
     } else {
         tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
         switch(opc) {
@@ -962,13 +961,19 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             break;
         }
         tcg_out_pushi(s, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 4);
+        stack_adjust = 4;
     }
 #endif
-    
+
+    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+
+    if (stack_adjust == 4) {
+        /* Pop and discard.  This is 2 bytes smaller than the add.  */
+        tcg_out_pop(s, TCG_REG_ECX);
+    } else if (stack_adjust != 0) {
+        tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+    }
+
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
     label2_ptr = s->code_ptr;
@@ -1061,10 +1066,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_call:
         if (const_args[0]) {
-            tcg_out8(s, 0xe8);
-            tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
+            tcg_out_calli(s, args[0]);
         } else {
-            tcg_out_modrm(s, 0xff, 2, args[0]);
+            /* call *reg */
+            tcg_out_modrm(s, 0xff, EXT_CALLN_Ev, args[0]);
         }
         break;
     case INDEX_op_jmp:
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 08/15] tcg-i386: Tidy ret.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (6 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 07/15] tcg-i386: Tidy calls Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 09/15] tcg-i386: Tidy setcc Richard Henderson
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define and use OPC_RET.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 28a01f3..c258775 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -183,6 +183,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_PUSH_r32	(0x50)
 #define OPC_PUSH_Iv	(0x68)
 #define OPC_PUSH_Ib	(0x6a)
+#define OPC_RET		(0xc3)
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
@@ -1405,7 +1406,7 @@ void tcg_target_qemu_prologue(TCGContext *s)
     for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
     }
-    tcg_out8(s, 0xc3); /* ret */
+    tcg_out_opc(s, OPC_RET);
 }
 
 void tcg_target_init(TCGContext *s)
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 09/15] tcg-i386: Tidy setcc.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (7 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 08/15] tcg-i386: Tidy ret Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 10/15] tcg-i386: Tidy unary arithmetic Richard Henderson
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define and use OPC_SETCC.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index c258775..a43dbfb 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -184,6 +184,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_PUSH_Iv	(0x68)
 #define OPC_PUSH_Ib	(0x6a)
 #define OPC_RET		(0xc3)
+#define OPC_SETCC	(0x90 | P_EXT)	/* ... plus condition code */
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
@@ -577,8 +578,7 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg dest,
                             TCGArg arg1, TCGArg arg2, int const_arg2)
 {
     tcg_out_cmp(s, arg1, arg2, const_arg2);
-    /* setcc */
-    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, dest);
+    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
     tcg_out_ext8u(s, dest, dest);
 }
 
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 10/15] tcg-i386: Tidy unary arithmetic.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (8 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 09/15] tcg-i386: Tidy setcc Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 11/15] tcg-i386: Tidy multiply Richard Henderson
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define OPC_GRP3 and EXT3_FOO to match.  Use them instead of
bare constants.

Define OPC_GRP5 and rename the existing EXT_BAR to EXT5_BAR to
make it clear which extension should be used with which opcode.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   38 +++++++++++++++++++++++++-------------
 1 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index a43dbfb..07da8c7 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -190,7 +190,11 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_SHIFT_cl	(0xd3)
 #define OPC_TESTL	(0x85)
 
-/* Group 1 opcode extensions for 0x80-0x83.  */
+#define OPC_GRP3_Ev	(0xf7)
+#define OPC_GRP5	(0xff)
+
+/* Group 1 opcode extensions for 0x80-0x83.
+   These are also used as modifiers for OPC_ARITH.  */
 #define ARITH_ADD 0
 #define ARITH_OR  1
 #define ARITH_ADC 2
@@ -207,9 +211,17 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
-/* Group 5 opcode extensions for 0xff.  */
-#define EXT_CALLN_Ev	2
-#define EXT_JMPN_Ev	4
+/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
+#define EXT3_NOT   2
+#define EXT3_NEG   3
+#define EXT3_MUL   4
+#define EXT3_IMUL  5
+#define EXT3_DIV   6
+#define EXT3_IDIV  7
+
+/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
+#define EXT5_CALLN_Ev	2
+#define EXT5_JMPN_Ev	4
 
 /* Condition codes to be added to OPC_JCC_{long,short}.  */
 #define JCC_JMP (-1)
@@ -1060,7 +1072,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tcg_out32(s, 0);
         } else {
             /* indirect jump method */
-            tcg_out_modrm_offset(s, 0xff, EXT_JMPN_Ev, -1,
+            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
                                  (tcg_target_long)(s->tb_next + args[0]));
         }
         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
@@ -1070,7 +1082,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tcg_out_calli(s, args[0]);
         } else {
             /* call *reg */
-            tcg_out_modrm(s, 0xff, EXT_CALLN_Ev, args[0]);
+            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
         }
         break;
     case INDEX_op_jmp:
@@ -1079,7 +1091,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
         } else {
             /* jmp *reg */
-            tcg_out_modrm(s, 0xff, EXT_JMPN_Ev, args[0]);
+            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
         }
         break;
     case INDEX_op_br:
@@ -1156,13 +1168,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
     case INDEX_op_mulu2_i32:
-        tcg_out_modrm(s, 0xf7, 4, args[3]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
         break;
     case INDEX_op_div2_i32:
-        tcg_out_modrm(s, 0xf7, 7, args[4]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_IDIV, args[4]);
         break;
     case INDEX_op_divu2_i32:
-        tcg_out_modrm(s, 0xf7, 6, args[4]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_DIV, args[4]);
         break;
     case INDEX_op_shl_i32:
         c = SHIFT_SHL;
@@ -1226,11 +1238,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_neg_i32:
-        tcg_out_modrm(s, 0xf7, 3, args[0]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, args[0]);
         break;
 
     case INDEX_op_not_i32:
-        tcg_out_modrm(s, 0xf7, 2, args[0]);
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NOT, args[0]);
         break;
 
     case INDEX_op_ext8s_i32:
@@ -1398,7 +1410,7 @@ void tcg_target_qemu_prologue(TCGContext *s)
     stack_addend = frame_size - push_size;
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
-    tcg_out_modrm(s, 0xff, EXT_JMPN_Ev, TCG_REG_EAX); /* jmp *%eax */
+    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_EAX); /* jmp *%eax */
     
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 11/15] tcg-i386: Tidy multiply.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (9 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 10/15] tcg-i386: Tidy unary arithmetic Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 12/15] tcg-i386: Tidy xchg Richard Henderson
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define and use OPC_IMUL_GvEv{,Ib,Iz}.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |    9 ++++++---
 1 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 07da8c7..8eb88da 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -166,6 +166,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_CALL_Jz	(0xe8)
 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
 #define OPC_DEC_r32	(0x48)
+#define OPC_IMUL_GvEv	(0xaf | P_EXT)
+#define OPC_IMUL_GvEvIb	(0x6b)
+#define OPC_IMUL_GvEvIz	(0x69)
 #define OPC_INC_r32	(0x40)
 #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
@@ -1157,14 +1160,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             int32_t val;
             val = args[2];
             if (val == (int8_t)val) {
-                tcg_out_modrm(s, 0x6b, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIb, args[0], args[0]);
                 tcg_out8(s, val);
             } else {
-                tcg_out_modrm(s, 0x69, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIz, args[0], args[0]);
                 tcg_out32(s, val);
             }
         } else {
-            tcg_out_modrm(s, 0xaf | P_EXT, args[0], args[2]);
+            tcg_out_modrm(s, OPC_IMUL_GvEv, args[0], args[2]);
         }
         break;
     case INDEX_op_mulu2_i32:
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 12/15] tcg-i386: Tidy xchg.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (10 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 11/15] tcg-i386: Tidy multiply Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 13/15] tcg-i386: Tidy lea Richard Henderson
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Define and use OPC_XCHG_ax_r32.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 8eb88da..09a56e8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -192,6 +192,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
 #define OPC_TESTL	(0x85)
+#define OPC_XCHG_ax_r32	(0x90)
 
 #define OPC_GRP3_Ev	(0xf7)
 #define OPC_GRP5	(0xff)
@@ -770,7 +771,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 3:
         if (data_reg == TCG_REG_EDX) {
-            tcg_out_opc(s, 0x90 + TCG_REG_EDX); /* xchg %edx, %eax */
+            /* xchg %edx, %eax */
+            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX);
             tcg_out_mov(s, data_reg2, TCG_REG_EAX);
         } else {
             tcg_out_mov(s, data_reg, TCG_REG_EAX);
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 13/15] tcg-i386: Tidy lea.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (11 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 12/15] tcg-i386: Tidy xchg Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 14/15] tcg-i386: Use lea for three-operand add Richard Henderson
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Implement full modrm+sib addressing mode processing.
Use that in qemu_ld/st to output the LEA.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   91 ++++++++++++++++++++++++++++++++-----------------
 1 files changed, 60 insertions(+), 31 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 09a56e8..6562a5d 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -174,6 +174,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
 #define OPC_JMP_long	(0xe9)
 #define OPC_JMP_short	(0xeb)
+#define OPC_LEA         (0x8d)
 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
@@ -272,40 +273,70 @@ static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
     tcg_out8(s, 0xc0 | (r << 3) | rm);
 }
 
-/* rm == -1 means no register index */
-static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, 
-                                        int32_t offset)
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.  
+   We handle either RM and INDEX missing with a -1 value.  */
+
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+                                     int index, int shift, int32_t offset)
 {
+    int mod, len;
+
+    if (index == -1 && rm == -1) {
+        /* Absolute address.  */
+        tcg_out_opc(s, opc);
+        tcg_out8(s, (r << 3) | 5);
+        tcg_out32(s, offset);
+        return;
+    }
+
     tcg_out_opc(s, opc);
+
+    /* Find the length of the immediate addend.  Note that the encoding
+       that would be used for (%ebp) indicates absolute addressing.  */
     if (rm == -1) {
-        tcg_out8(s, 0x05 | (r << 3));
-        tcg_out32(s, offset);
+        mod = 0, len = 4, rm = 5;
     } else if (offset == 0 && rm != TCG_REG_EBP) {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x04 | (r << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x00 | (r << 3) | rm);
-        }
-    } else if ((int8_t)offset == offset) {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x44 | (r << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x40 | (r << 3) | rm);
-        }
-        tcg_out8(s, offset);
+        mod = 0, len = 0;
+    } else if (offset == (int8_t)offset) {
+        mod = 0x40, len = 1;
     } else {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x84 | (r << 3));
-            tcg_out8(s, 0x24);
+        mod = 0x80, len = 4;
+    }
+
+    /* Use a single byte MODRM format if possible.  Note that the encoding
+       that would be used for %esp is the escape to the two byte form.  */
+    if (index == -1 && rm != TCG_REG_ESP) {
+        /* Single byte MODRM format.  */
+        tcg_out8(s, mod | (r << 3) | rm);
+    } else {
+        /* Two byte MODRM+SIB format.  */
+
+        /* Note that the encoding that would place %esp into the index
+           field indicates no index register.  */
+        if (index == -1) {
+            index = 4;
         } else {
-            tcg_out8(s, 0x80 | (r << 3) | rm);
+            assert(index != TCG_REG_ESP);
         }
+
+        tcg_out8(s, mod | (r << 3) | 4);
+        tcg_out8(s, (shift << 6) | (index << 3) | rm);
+    }
+
+    if (len == 1) {
+        tcg_out8(s, offset);
+    } else if (len == 4) {
         tcg_out32(s, offset);
     }
 }
 
+/* rm == -1 means no register index */
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, 
+                                        int32_t offset)
+{
+    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
+}
+
 /* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
 {
@@ -710,10 +741,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
     tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
-    tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
-    tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-    tcg_out8(s, (5 << 3) | r1);
-    tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
+    tcg_out_modrm_sib_offset(s, OPC_LEA, r1, TCG_AREG0, r1, 0,
+                             offsetof(CPUState,
+                                      tlb_table[mem_index][0].addr_read));
 
     /* cmp 0(r1), r0 */
     tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
@@ -903,10 +933,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
     tgen_arithi(s, ARITH_AND, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
-    tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
-    tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-    tcg_out8(s, (5 << 3) | r1);
-    tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
+    tcg_out_modrm_sib_offset(s, OPC_LEA, r1, TCG_AREG0, r1, 0,
+                             offsetof(CPUState,
+                                      tlb_table[mem_index][0].addr_write));
 
     /* cmp 0(r1), r0 */
     tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 14/15] tcg-i386: Use lea for three-operand add.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (12 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 13/15] tcg-i386: Tidy lea Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 15/15] tcg-i386: Nuke trailing whitespace Richard Henderson
  2010-05-21 16:27 ` [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Aurelien Jarno
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

The result is shorter than the mov+add that TCG would
otherwise generate for us.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   23 ++++++++++++++++++++---
 1 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 6562a5d..fc61e80 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1165,6 +1165,25 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_st_i32:
         tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
+    case INDEX_op_add_i32:
+        /* For 3-operand addition, use LEA.  */
+        if (args[0] != args[1]) {
+            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
+
+            if (const_args[2]) {
+                c3 = a2, a2 = -1;
+            } else if (a0 == a2) {
+                /* Watch out for dest = src + dest, since we've removed
+                   the matching constraint on the add.  */
+                tgen_arithr(s, ARITH_ADD, a0, a1);
+                break;
+            }
+
+            tcg_out_modrm_sib_offset(s, OPC_LEA, a0, a1, a2, 0, c3);
+            break;
+        }
+        c = ARITH_ADD;
+        goto gen_arith;
     case INDEX_op_sub_i32:
         c = ARITH_SUB;
         goto gen_arith;
@@ -1177,8 +1196,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_xor_i32:
         c = ARITH_XOR;
         goto gen_arith;
-    case INDEX_op_add_i32:
-        c = ARITH_ADD;
     gen_arith:
         if (const_args[2]) {
             tgen_arithi(s, c, args[0], args[2], 0);
@@ -1353,7 +1370,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_st16_i32, { "r", "r" } },
     { INDEX_op_st_i32, { "r", "r" } },
 
-    { INDEX_op_add_i32, { "r", "0", "ri" } },
+    { INDEX_op_add_i32, { "r", "r", "ri" } },
     { INDEX_op_sub_i32, { "r", "0", "ri" } },
     { INDEX_op_mul_i32, { "r", "0", "ri" } },
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Qemu-devel] [PATCH 15/15] tcg-i386: Nuke trailing whitespace.
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (13 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 14/15] tcg-i386: Use lea for three-operand add Richard Henderson
@ 2010-05-21 15:30 ` Richard Henderson
  2010-05-21 16:27 ` [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Aurelien Jarno
  15 siblings, 0 replies; 17+ messages in thread
From: Richard Henderson @ 2010-05-21 15:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   52 ++++++++++++++++++++++++------------------------
 1 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index fc61e80..396a2f1 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -50,7 +50,7 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_EAX, TCG_REG_EDX };
 
 static uint8_t *tb_ret_addr;
 
-static void patch_reloc(uint8_t *code_ptr, int type, 
+static void patch_reloc(uint8_t *code_ptr, int type,
                         tcg_target_long value, tcg_target_long addend)
 {
     value += addend;
@@ -273,7 +273,7 @@ static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
     tcg_out8(s, 0xc0 | (r << 3) | rm);
 }
 
-/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.  
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
    We handle either RM and INDEX missing with a -1 value.  */
 
 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
@@ -331,7 +331,7 @@ static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
 }
 
 /* rm == -1 means no register index */
-static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, 
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
                                         int32_t offset)
 {
     tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
@@ -474,7 +474,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
-    
+
     if (l->has_value) {
         val = l->u.value - (tcg_target_long)s->code_ptr;
         val1 = val - 2;
@@ -733,8 +733,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     r1 = TCG_REG_EDX;
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg); 
-    tcg_out_mov(s, r0, addr_reg); 
+    tcg_out_mov(s, r1, addr_reg);
+    tcg_out_mov(s, r0, addr_reg);
 
     tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
@@ -747,9 +747,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* cmp 0(r1), r0 */
     tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
-    
+
     tcg_out_mov(s, r0, addr_reg);
-    
+
 #if TARGET_LONG_BITS == 32
     /* je label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JE);
@@ -760,7 +760,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out8(s, OPC_JCC_short + JCC_JNE);
     label3_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* cmp 4(r1), addr_reg2 */
     tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
 
@@ -768,7 +768,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out8(s, OPC_JCC_short + JCC_JE);
     label1_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label3: */
     *label3_ptr = s->code_ptr - label3_ptr - 1;
 #endif
@@ -815,13 +815,13 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out8(s, OPC_JMP_short);
     label2_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label1: */
     *label1_ptr = s->code_ptr - label1_ptr - 1;
 
     /* add x(r1), r0 */
     tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
-                         offsetof(CPUTLBEntry, addend) - 
+                         offsetof(CPUTLBEntry, addend) -
                          offsetof(CPUTLBEntry, addr_read));
 #else
     r0 = addr_reg;
@@ -925,9 +925,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     r1 = TCG_REG_EDX;
 
 #if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg); 
-    tcg_out_mov(s, r0, addr_reg); 
- 
+    tcg_out_mov(s, r1, addr_reg);
+    tcg_out_mov(s, r0, addr_reg);
+
     tcg_out_shifti(s, SHIFT_SHR, r1, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
     tgen_arithi(s, ARITH_AND, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
@@ -939,9 +939,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     /* cmp 0(r1), r0 */
     tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
-    
+
     tcg_out_mov(s, r0, addr_reg);
-    
+
 #if TARGET_LONG_BITS == 32
     /* je label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JE);
@@ -952,7 +952,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out8(s, OPC_JCC_short + JCC_JNE);
     label3_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* cmp 4(r1), addr_reg2 */
     tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
 
@@ -960,7 +960,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out8(s, OPC_JCC_short + JCC_JE);
     label1_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label3: */
     *label3_ptr = s->code_ptr - label3_ptr - 1;
 #endif
@@ -1025,13 +1025,13 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out8(s, OPC_JMP_short);
     label2_ptr = s->code_ptr;
     s->code_ptr++;
-    
+
     /* label1: */
     *label1_ptr = s->code_ptr - label1_ptr - 1;
 
     /* add x(r1), r0 */
     tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
-                         offsetof(CPUTLBEntry, addend) - 
+                         offsetof(CPUTLBEntry, addend) -
                          offsetof(CPUTLBEntry, addr_write));
 #else
     r0 = addr_reg;
@@ -1091,7 +1091,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                               const TCGArg *args, const int *const_args)
 {
     int c;
-    
+
     switch(opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, args[0]);
@@ -1334,7 +1334,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld64:
         tcg_out_qemu_ld(s, args, 3);
         break;
-        
+
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, 0);
         break;
@@ -1447,7 +1447,7 @@ static int tcg_target_callee_save_regs[] = {
 void tcg_target_qemu_prologue(TCGContext *s)
 {
     int i, frame_size, push_size, stack_addend;
-    
+
     /* TB prologue */
     /* save all callee saved registers */
     for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
@@ -1456,13 +1456,13 @@ void tcg_target_qemu_prologue(TCGContext *s)
     /* reserve some stack space */
     push_size = 4 + ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
     frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
-    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & 
+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
         ~(TCG_TARGET_STACK_ALIGN - 1);
     stack_addend = frame_size - push_size;
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_EAX); /* jmp *%eax */
-    
+
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
     tcg_out_addi(s, TCG_REG_ESP, stack_addend);
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3
  2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
                   ` (14 preceding siblings ...)
  2010-05-21 15:30 ` [Qemu-devel] [PATCH 15/15] tcg-i386: Nuke trailing whitespace Richard Henderson
@ 2010-05-21 16:27 ` Aurelien Jarno
  15 siblings, 0 replies; 17+ messages in thread
From: Aurelien Jarno @ 2010-05-21 16:27 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Fri, May 21, 2010 at 08:30:20AM -0700, Richard Henderson wrote:
> Changes v2->v3:
>   * ext8u and ext8s operate on 'q' register inputs only.
>   * pushi is inline and loses the ifdef.
>   * trailing whitespace cleanup
> 
> 
> r~
> 
> 
> Richard Henderson (15):
>   tcg-i386: Tidy ext8u and ext16u operations.
>   tcg-i386: Tidy ext8s and ext16s operations.
>   tcg-i386: Tidy immediate arithmetic operations.
>   tcg-i386: Tidy non-immediate arithmetic operations.
>   tcg-i386: Tidy movi.
>   tcg-i386: Tidy push/pop.
>   tcg-i386: Tidy calls.
>   tcg-i386: Tidy ret.
>   tcg-i386: Tidy setcc.
>   tcg-i386: Tidy unary arithmetic.
>   tcg-i386: Tidy multiply.
>   tcg-i386: Tidy xchg.
>   tcg-i386: Tidy lea.
>   tcg-i386: Use lea for three-operand add.
>   tcg-i386: Nuke trailing whitespace.
> 
>  tcg/i386/tcg-target.c |  504 ++++++++++++++++++++++++++++++-------------------
>  1 files changed, 308 insertions(+), 196 deletions(-)
> 

Thanks, all applied.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2010-05-21 16:27 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-21 15:30 [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 01/15] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 02/15] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 03/15] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 04/15] tcg-i386: Tidy non-immediate " Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 05/15] tcg-i386: Tidy movi Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 06/15] tcg-i386: Tidy push/pop Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 07/15] tcg-i386: Tidy calls Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 08/15] tcg-i386: Tidy ret Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 09/15] tcg-i386: Tidy setcc Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 10/15] tcg-i386: Tidy unary arithmetic Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 11/15] tcg-i386: Tidy multiply Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 12/15] tcg-i386: Tidy xchg Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 13/15] tcg-i386: Tidy lea Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 14/15] tcg-i386: Use lea for three-operand add Richard Henderson
2010-05-21 15:30 ` [Qemu-devel] [PATCH 15/15] tcg-i386: Nuke trailing whitespace Richard Henderson
2010-05-21 16:27 ` [Qemu-devel] [PATCH 00/15] tcg-i386 cleanup and improvement, v3 Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).