All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations.
Date: Fri, 21 May 2010 11:38:49 +0200	[thread overview]
Message-ID: <20100521093849.GD1950@volta.aurel32.net> (raw)
In-Reply-To: <f9a54fb8dd383110104b9287fc246873c33c74f5.1272479073.git.rth@twiddle.net>

On Wed, Apr 14, 2010 at 10:16:33AM -0700, Richard Henderson wrote:
> Add more OPC values, and tgen_arithr.  Use the later throughout.
> 
> Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form
> instead of the Ev,Gv opcode form used previously.  Both forms
> disassemble properly, and so there's no visible change when diffing
> log files before and after the change.  This change makes the operand
> ordering within the output routines more natural, and avoids the need
> to define an OPC_ARITH_EvGv since a read-modify-write with memory is
> not needed within TCG.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

> ---
>  tcg/i386/tcg-target.c |   78 ++++++++++++++++++++++++++++++-------------------
>  1 files changed, 48 insertions(+), 30 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index df1bdfc..b4e8e74 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -165,7 +165,12 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  
>  #define OPC_ARITH_EvIz	(0x81)
>  #define OPC_ARITH_EvIb	(0x83)
> +#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
> +#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
> +#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
> +#define OPC_DEC_r32	(0x48)
>  #define OPC_BSWAP	(0xc8 | P_EXT)
> +#define OPC_INC_r32	(0x40)
>  #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
>  #define OPC_JCC_short	(0x70)		/* ... plus condition code */
>  #define OPC_JMP_long	(0xe9)
> @@ -180,6 +185,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_SHIFT_1	(0xd1)
>  #define OPC_SHIFT_Ib	(0xc1)
>  #define OPC_SHIFT_cl	(0xd3)
> +#define OPC_TESTL	(0x85)
>  
>  /* Group 1 opcode extensions for 0x80-0x83.  */
>  #define ARITH_ADD 0
> @@ -280,6 +286,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
>      }
>  }
>  
> +/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
> +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
> +{
> +    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
> +}
> +
>  static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
>  {
>      if (arg != ret) {
> @@ -291,8 +303,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
>                                  int ret, int32_t arg)
>  {
>      if (arg == 0) {
> -        /* xor r0,r0 */
> -        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
> +        tgen_arithr(s, ARITH_XOR, ret, ret);
>      } else {
>          tcg_out8(s, 0xb8 + ret);
>          tcg_out32(s, arg);
> @@ -374,14 +385,15 @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
>      tcg_out_shifti(s, SHIFT_ROL, reg, 8);
>  }
>  
> -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
> +static inline void tgen_arithi(TCGContext *s, int c, int r0,
> +                               int32_t val, int cf)
>  {
> -    if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
> -        /* inc */
> -        tcg_out_opc(s, 0x40 + r0);
> -    } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
> -        /* dec */
> -        tcg_out_opc(s, 0x48 + r0);
> +    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
> +       partial flags update stalls on Pentium4 and are not recommended
> +       by current Intel optimization manuals.  */
> +    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
> +        int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
> +        tcg_out_opc(s, opc + r0);
>      } else if (val == (int8_t)val) {
>          tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
>          tcg_out8(s, val);
> @@ -454,12 +466,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
>      if (const_arg2) {
>          if (arg2 == 0) {
>              /* test r, r */
> -            tcg_out_modrm(s, 0x85, arg1, arg1);
> +            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
>          } else {
>              tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
>          }
>      } else {
> -        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> +        tgen_arithr(s, ARITH_CMP, arg1, arg2);
>      }
>  }
>  
> @@ -674,7 +686,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
>  
>      /* cmp 0(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>      
>      tcg_out_mov(s, r0, addr_reg);
>      
> @@ -690,7 +702,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      s->code_ptr++;
>      
>      /* cmp 4(r1), addr_reg2 */
> -    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>  
>      /* je label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -749,7 +761,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      *label1_ptr = s->code_ptr - label1_ptr - 1;
>  
>      /* add x(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
> +    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> +                         offsetof(CPUTLBEntry, addend) - 
>                           offsetof(CPUTLBEntry, addr_read));
>  #else
>      r0 = addr_reg;
> @@ -864,7 +877,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
>  
>      /* cmp 0(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>      
>      tcg_out_mov(s, r0, addr_reg);
>      
> @@ -880,7 +893,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      s->code_ptr++;
>      
>      /* cmp 4(r1), addr_reg2 */
> -    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>  
>      /* je label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -961,7 +974,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      *label1_ptr = s->code_ptr - label1_ptr - 1;
>  
>      /* add x(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
> +    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> +                         offsetof(CPUTLBEntry, addend) - 
>                           offsetof(CPUTLBEntry, addr_write));
>  #else
>      r0 = addr_reg;
> @@ -1113,7 +1127,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (const_args[2]) {
>              tgen_arithi(s, c, args[0], args[2], 0);
>          } else {
> -            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
> +            tgen_arithr(s, c, args[0], args[2]);
>          }
>          break;
>      case INDEX_op_mul_i32:
> @@ -1163,24 +1177,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          goto gen_shift32;
>  
>      case INDEX_op_add2_i32:
> -        if (const_args[4]) 
> +        if (const_args[4]) {
>              tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
> -        if (const_args[5]) 
> +        } else {
> +            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
> +        }
> +        if (const_args[5]) {
>              tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
> +        } else {
> +            tgen_arithr(s, ARITH_ADC, args[0], args[5]);
> +        }
>          break;
>      case INDEX_op_sub2_i32:
> -        if (const_args[4]) 
> +        if (const_args[4]) {
>              tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
> -        if (const_args[5]) 
> +        } else {
> +            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
> +        }
> +        if (const_args[5]) {
>              tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
> +        } else {
> +            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
> +        }
>          break;
>      case INDEX_op_brcond_i32:
>          tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> -- 
> 1.6.6.1
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

  reply	other threads:[~2010-05-21  9:39 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-28 18:24 [Qemu-devel] [PATCH 00/22] tcg-i386 cleanup and improvement, v2 Richard Henderson
2010-04-13 22:23 ` [Qemu-devel] [PATCH 01/22] tcg-i386: Allocate call-saved registers first Richard Henderson
2010-05-19  6:46   ` Aurelien Jarno
2010-04-13 22:26 ` [Qemu-devel] [PATCH 02/22] tcg-i386: Tidy initialization of tcg_target_call_clobber_regs Richard Henderson
2010-05-19  6:46   ` Aurelien Jarno
2010-04-13 22:59 ` [Qemu-devel] [PATCH 03/22] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
2010-05-19  6:47   ` Aurelien Jarno
2010-05-19 18:31     ` Richard Henderson
2010-05-20 13:39       ` Aurelien Jarno
2010-05-20 14:04         ` Aurelien Jarno
2010-05-20 14:40           ` Richard Henderson
2010-05-20 18:50             ` Aurelien Jarno
2010-04-13 23:13 ` [Qemu-devel] [PATCH 04/22] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
2010-05-20 18:52   ` Aurelien Jarno
2010-04-14 14:58 ` [Qemu-devel] [PATCH 07/22] tcg-i386: Tidy move operations Richard Henderson
2010-04-14 15:06 ` [Qemu-devel] [PATCH 08/22] tcg-i386: Eliminate extra move from qemu_ld64 Richard Henderson
2010-04-14 15:26 ` [Qemu-devel] [PATCH 09/22] tcg-i386: Tidy jumps Richard Henderson
2010-04-14 15:38 ` [Qemu-devel] [PATCH 10/22] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno
2010-04-14 17:16 ` [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate " Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno [this message]
2010-04-14 17:20 ` [Qemu-devel] [PATCH 12/22] tcg-i386: Tidy movi Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno
2010-04-14 17:59 ` [Qemu-devel] [PATCH 13/22] tcg-i386: Tidy push/pop Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno
2010-04-14 18:02 ` [Qemu-devel] [PATCH 14/22] tcg-i386: Tidy calls Richard Henderson
2010-05-21  9:40   ` Aurelien Jarno
2010-04-14 18:04 ` [Qemu-devel] [PATCH 15/22] tcg-i386: Tidy ret Richard Henderson
2010-05-21  9:40   ` Aurelien Jarno
2010-04-14 18:07 ` [Qemu-devel] [PATCH 16/22] tcg-i386: Tidy setcc Richard Henderson
2010-05-21  9:40   ` Aurelien Jarno
2010-04-14 18:22 ` [Qemu-devel] [PATCH 17/22] tcg-i386: Tidy unary arithmetic Richard Henderson
2010-05-21  9:41   ` Aurelien Jarno
2010-04-14 18:29 ` [Qemu-devel] [PATCH 18/22] tcg-i386: Tidy multiply Richard Henderson
2010-05-21  9:41   ` Aurelien Jarno
2010-04-14 18:32 ` [Qemu-devel] [PATCH 19/22] tcg-i386: Tidy xchg Richard Henderson
2010-05-21  9:42   ` Aurelien Jarno
2010-04-14 19:08 ` [Qemu-devel] [PATCH 20/22] tcg-i386: Tidy lea Richard Henderson
2010-05-21  9:43   ` Aurelien Jarno
2010-04-14 20:29 ` [Qemu-devel] [PATCH 21/22] tcg-i386: Use lea for three-operand add Richard Henderson
2010-05-21  9:44   ` Aurelien Jarno
2010-04-28 17:31 ` [Qemu-devel] [PATCH 05/22] tcg-i386: Tidy bswap operations Richard Henderson
2010-04-28 17:38 ` [Qemu-devel] [PATCH 06/22] tcg-i386: Tidy shift operations Richard Henderson
2010-04-28 18:23 ` [Qemu-devel] [PATCH 22/22] tcg-i386: Tidy data16 prefixes Richard Henderson
2010-05-21  9:45   ` Aurelien Jarno
2010-05-17 18:26 ` [Qemu-devel] [PATCH 00/22] tcg-i386 cleanup and improvement, v2 Richard Henderson
2010-05-17 19:54   ` Aurelien Jarno
2010-05-21  9:46 ` Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100521093849.GD1950@volta.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.