Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations.

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations.
Date: Fri, 21 May 2010 11:38:49 +0200	[thread overview]
Message-ID: <20100521093849.GD1950@volta.aurel32.net> (raw)
In-Reply-To: <f9a54fb8dd383110104b9287fc246873c33c74f5.1272479073.git.rth@twiddle.net>

On Wed, Apr 14, 2010 at 10:16:33AM -0700, Richard Henderson wrote:
> Add more OPC values, and tgen_arithr.  Use the later throughout.
> 
> Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form
> instead of the Ev,Gv opcode form used previously.  Both forms
> disassemble properly, and so there's no visible change when diffing
> log files before and after the change.  This change makes the operand
> ordering within the output routines more natural, and avoids the need
> to define an OPC_ARITH_EvGv since a read-modify-write with memory is
> not needed within TCG.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

> ---
>  tcg/i386/tcg-target.c |   78 ++++++++++++++++++++++++++++++-------------------
>  1 files changed, 48 insertions(+), 30 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index df1bdfc..b4e8e74 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -165,7 +165,12 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  
>  #define OPC_ARITH_EvIz	(0x81)
>  #define OPC_ARITH_EvIb	(0x83)
> +#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
> +#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
> +#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
> +#define OPC_DEC_r32	(0x48)
>  #define OPC_BSWAP	(0xc8 | P_EXT)
> +#define OPC_INC_r32	(0x40)
>  #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
>  #define OPC_JCC_short	(0x70)		/* ... plus condition code */
>  #define OPC_JMP_long	(0xe9)
> @@ -180,6 +185,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_SHIFT_1	(0xd1)
>  #define OPC_SHIFT_Ib	(0xc1)
>  #define OPC_SHIFT_cl	(0xd3)
> +#define OPC_TESTL	(0x85)
>  
>  /* Group 1 opcode extensions for 0x80-0x83.  */
>  #define ARITH_ADD 0
> @@ -280,6 +286,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
>      }
>  }
>  
> +/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
> +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
> +{
> +    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
> +}
> +
>  static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
>  {
>      if (arg != ret) {
> @@ -291,8 +303,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
>                                  int ret, int32_t arg)
>  {
>      if (arg == 0) {
> -        /* xor r0,r0 */
> -        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
> +        tgen_arithr(s, ARITH_XOR, ret, ret);
>      } else {
>          tcg_out8(s, 0xb8 + ret);
>          tcg_out32(s, arg);
> @@ -374,14 +385,15 @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
>      tcg_out_shifti(s, SHIFT_ROL, reg, 8);
>  }
>  
> -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
> +static inline void tgen_arithi(TCGContext *s, int c, int r0,
> +                               int32_t val, int cf)
>  {
> -    if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
> -        /* inc */
> -        tcg_out_opc(s, 0x40 + r0);
> -    } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
> -        /* dec */
> -        tcg_out_opc(s, 0x48 + r0);
> +    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
> +       partial flags update stalls on Pentium4 and are not recommended
> +       by current Intel optimization manuals.  */
> +    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
> +        int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
> +        tcg_out_opc(s, opc + r0);
>      } else if (val == (int8_t)val) {
>          tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
>          tcg_out8(s, val);
> @@ -454,12 +466,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
>      if (const_arg2) {
>          if (arg2 == 0) {
>              /* test r, r */
> -            tcg_out_modrm(s, 0x85, arg1, arg1);
> +            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
>          } else {
>              tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
>          }
>      } else {
> -        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> +        tgen_arithr(s, ARITH_CMP, arg1, arg2);
>      }
>  }
>  
> @@ -674,7 +686,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
>  
>      /* cmp 0(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>      
>      tcg_out_mov(s, r0, addr_reg);
>      
> @@ -690,7 +702,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      s->code_ptr++;
>      
>      /* cmp 4(r1), addr_reg2 */
> -    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>  
>      /* je label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -749,7 +761,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      *label1_ptr = s->code_ptr - label1_ptr - 1;
>  
>      /* add x(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
> +    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> +                         offsetof(CPUTLBEntry, addend) - 
>                           offsetof(CPUTLBEntry, addr_read));
>  #else
>      r0 = addr_reg;
> @@ -864,7 +877,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
>  
>      /* cmp 0(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>      
>      tcg_out_mov(s, r0, addr_reg);
>      
> @@ -880,7 +893,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      s->code_ptr++;
>      
>      /* cmp 4(r1), addr_reg2 */
> -    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>  
>      /* je label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -961,7 +974,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      *label1_ptr = s->code_ptr - label1_ptr - 1;
>  
>      /* add x(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
> +    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> +                         offsetof(CPUTLBEntry, addend) - 
>                           offsetof(CPUTLBEntry, addr_write));
>  #else
>      r0 = addr_reg;
> @@ -1113,7 +1127,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (const_args[2]) {
>              tgen_arithi(s, c, args[0], args[2], 0);
>          } else {
> -            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
> +            tgen_arithr(s, c, args[0], args[2]);
>          }
>          break;
>      case INDEX_op_mul_i32:
> @@ -1163,24 +1177,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          goto gen_shift32;
>  
>      case INDEX_op_add2_i32:
> -        if (const_args[4]) 
> +        if (const_args[4]) {
>              tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
> -        if (const_args[5]) 
> +        } else {
> +            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
> +        }
> +        if (const_args[5]) {
>              tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
> +        } else {
> +            tgen_arithr(s, ARITH_ADC, args[0], args[5]);
> +        }
>          break;
>      case INDEX_op_sub2_i32:
> -        if (const_args[4]) 
> +        if (const_args[4]) {
>              tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
> -        if (const_args[5]) 
> +        } else {
> +            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
> +        }
> +        if (const_args[5]) {
>              tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
> +        } else {
> +            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
> +        }
>          break;
>      case INDEX_op_brcond_i32:
>          tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> -- 
> 1.6.6.1
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

next prev parent reply	other threads:[~2010-05-21  9:39 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-28 18:24 [Qemu-devel] [PATCH 00/22] tcg-i386 cleanup and improvement, v2 Richard Henderson
2010-04-13 22:23 ` [Qemu-devel] [PATCH 01/22] tcg-i386: Allocate call-saved registers first Richard Henderson
2010-05-19  6:46   ` Aurelien Jarno
2010-04-13 22:26 ` [Qemu-devel] [PATCH 02/22] tcg-i386: Tidy initialization of tcg_target_call_clobber_regs Richard Henderson
2010-05-19  6:46   ` Aurelien Jarno
2010-04-13 22:59 ` [Qemu-devel] [PATCH 03/22] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
2010-05-19  6:47   ` Aurelien Jarno
2010-05-19 18:31     ` Richard Henderson
2010-05-20 13:39       ` Aurelien Jarno
2010-05-20 14:04         ` Aurelien Jarno
2010-05-20 14:40           ` Richard Henderson
2010-05-20 18:50             ` Aurelien Jarno
2010-04-13 23:13 ` [Qemu-devel] [PATCH 04/22] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
2010-05-20 18:52   ` Aurelien Jarno
2010-04-14 14:58 ` [Qemu-devel] [PATCH 07/22] tcg-i386: Tidy move operations Richard Henderson
2010-04-14 15:06 ` [Qemu-devel] [PATCH 08/22] tcg-i386: Eliminate extra move from qemu_ld64 Richard Henderson
2010-04-14 15:26 ` [Qemu-devel] [PATCH 09/22] tcg-i386: Tidy jumps Richard Henderson
2010-04-14 15:38 ` [Qemu-devel] [PATCH 10/22] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno
2010-04-14 17:16 ` [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate " Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno [this message]
2010-04-14 17:20 ` [Qemu-devel] [PATCH 12/22] tcg-i386: Tidy movi Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno
2010-04-14 17:59 ` [Qemu-devel] [PATCH 13/22] tcg-i386: Tidy push/pop Richard Henderson
2010-05-21  9:38   ` Aurelien Jarno
2010-04-14 18:02 ` [Qemu-devel] [PATCH 14/22] tcg-i386: Tidy calls Richard Henderson
2010-05-21  9:40   ` Aurelien Jarno
2010-04-14 18:04 ` [Qemu-devel] [PATCH 15/22] tcg-i386: Tidy ret Richard Henderson
2010-05-21  9:40   ` Aurelien Jarno
2010-04-14 18:07 ` [Qemu-devel] [PATCH 16/22] tcg-i386: Tidy setcc Richard Henderson
2010-05-21  9:40   ` Aurelien Jarno
2010-04-14 18:22 ` [Qemu-devel] [PATCH 17/22] tcg-i386: Tidy unary arithmetic Richard Henderson
2010-05-21  9:41   ` Aurelien Jarno
2010-04-14 18:29 ` [Qemu-devel] [PATCH 18/22] tcg-i386: Tidy multiply Richard Henderson
2010-05-21  9:41   ` Aurelien Jarno
2010-04-14 18:32 ` [Qemu-devel] [PATCH 19/22] tcg-i386: Tidy xchg Richard Henderson
2010-05-21  9:42   ` Aurelien Jarno
2010-04-14 19:08 ` [Qemu-devel] [PATCH 20/22] tcg-i386: Tidy lea Richard Henderson
2010-05-21  9:43   ` Aurelien Jarno
2010-04-14 20:29 ` [Qemu-devel] [PATCH 21/22] tcg-i386: Use lea for three-operand add Richard Henderson
2010-05-21  9:44   ` Aurelien Jarno
2010-04-28 17:31 ` [Qemu-devel] [PATCH 05/22] tcg-i386: Tidy bswap operations Richard Henderson
2010-04-28 17:38 ` [Qemu-devel] [PATCH 06/22] tcg-i386: Tidy shift operations Richard Henderson
2010-04-28 18:23 ` [Qemu-devel] [PATCH 22/22] tcg-i386: Tidy data16 prefixes Richard Henderson
2010-05-21  9:45   ` Aurelien Jarno
2010-05-17 18:26 ` [Qemu-devel] [PATCH 00/22] tcg-i386 cleanup and improvement, v2 Richard Henderson
2010-05-17 19:54   ` Aurelien Jarno
2010-05-21  9:46 ` Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100521093849.GD1950@volta.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).