qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 5/5] tcg/i386: Use SHLX/SHRX/SARX instructions
Date: Sun, 16 Feb 2014 19:12:11 +0100	[thread overview]
Message-ID: <20140216181211.GA18676@hall.aurel32.net> (raw)
In-Reply-To: <1391179418-13422-6-git-send-email-rth@twiddle.net>

On Fri, Jan 31, 2014 at 08:43:38AM -0600, Richard Henderson wrote:
> These three-operand shift instructions do not require the shift count
> to be placed into ECX.  This reduces the number of mov insns required,
> with the mere addition of a new register constraint.
> 
> Don't attempt to get rid of the matching constraint, as that's impossible
> to manipulate with just a new constraint.  In addition, constant shifts
> still need the matching constraint.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/i386/tcg-target.c | 61 +++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 50 insertions(+), 11 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4f6b9c1..fef1717 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -133,6 +133,12 @@ static bool have_movbe;
>     it there.  Therefore we always define the variable.  */
>  bool have_bmi1;
>  
> +#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
> +static bool have_bmi2;
> +#else
> +# define have_bmi2 0
> +#endif
> +
>  static uint8_t *tb_ret_addr;
>  
>  static void patch_reloc(uint8_t *code_ptr, int type,
> @@ -175,6 +181,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
>          tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
>          break;
>      case 'c':
> +    case_c:
>          ct->ct |= TCG_CT_REG;
>          tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
>          break;
> @@ -203,6 +210,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
>          tcg_regset_set32(ct->u.regs, 0, 0xf);
>          break;
>      case 'r':
> +    case_r:
>          ct->ct |= TCG_CT_REG;
>          if (TCG_TARGET_REG_BITS == 64) {
>              tcg_regset_set32(ct->u.regs, 0, 0xffff);
> @@ -210,6 +218,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
>              tcg_regset_set32(ct->u.regs, 0, 0xff);
>          }
>          break;
> +    case 'C':
> +        /* With SHRX et al, we need not use ECX as shift count register.  */
> +        if (have_bmi2) {
> +            goto case_r;
> +        } else {
> +            goto case_c;
> +        }
>  
>          /* qemu_ld/st address constraint */
>      case 'L':
> @@ -283,6 +298,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  # define P_REXB_RM	0
>  # define P_GS           0
>  #endif
> +#define P_SIMDF3        0x10000         /* 0xf3 opcode prefix */
> +#define P_SIMDF2        0x20000         /* 0xf2 opcode prefix */
>  
>  #define OPC_ARITH_EvIz	(0x81)
>  #define OPC_ARITH_EvIb	(0x83)
> @@ -325,6 +342,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_SHIFT_1	(0xd1)
>  #define OPC_SHIFT_Ib	(0xc1)
>  #define OPC_SHIFT_cl	(0xd3)
> +#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3)
> +#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
> +#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
>  #define OPC_TESTL	(0x85)
>  #define OPC_XCHG_ax_r32	(0x90)
>  
> @@ -493,7 +513,14 @@ static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
>  
>          tmp = (r & 8 ? 0 : 0x80);          /* VEX.R */
>      }
> -    tmp |= (opc & P_DATA16 ? 1 : 0);       /* VEX.pp */
> +    /* VEX.pp */
> +    if (opc & P_DATA16) {
> +        tmp |= 1;                          /* 0x66 */
> +    } else if (opc & P_SIMDF3) {
> +        tmp |= 2;                          /* 0xf3 */
> +    } else if (opc & P_SIMDF2) {
> +        tmp |= 3;                          /* 0xf2 */
> +    }
>      tmp |= (~v & 15) << 3;                 /* VEX.vvvv */
>      tcg_out8(s, tmp);
>      tcg_out8(s, opc);
> @@ -1689,7 +1716,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>                                const TCGArg *args, const int *const_args)
>  {
> -    int c, rexw = 0;
> +    int c, vexop, rexw = 0;
>  
>  #if TCG_TARGET_REG_BITS == 64
>  # define OP_32_64(x) \
> @@ -1860,19 +1887,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>  
>      OP_32_64(shl):
>          c = SHIFT_SHL;
> -        goto gen_shift;
> +        vexop = OPC_SHLX;
> +        goto gen_shift_maybe_vex;
>      OP_32_64(shr):
>          c = SHIFT_SHR;
> -        goto gen_shift;
> +        vexop = OPC_SHRX;
> +        goto gen_shift_maybe_vex;
>      OP_32_64(sar):
>          c = SHIFT_SAR;
> -        goto gen_shift;
> +        vexop = OPC_SARX;
> +        goto gen_shift_maybe_vex;
>      OP_32_64(rotl):
>          c = SHIFT_ROL;
>          goto gen_shift;
>      OP_32_64(rotr):
>          c = SHIFT_ROR;
>          goto gen_shift;
> +    gen_shift_maybe_vex:
> +        if (have_bmi2 && !const_args[2]) {
> +            tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
> +            break;
> +        }
> +        /* FALLTHRU */
>      gen_shift:
>          if (const_args[2]) {
>              tcg_out_shifti(s, c + rexw, args[0], args[2]);
> @@ -2065,9 +2101,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_xor_i32, { "r", "0", "ri" } },
>      { INDEX_op_andc_i32, { "r", "r", "ri" } },
>  
> -    { INDEX_op_shl_i32, { "r", "0", "ci" } },
> -    { INDEX_op_shr_i32, { "r", "0", "ci" } },
> -    { INDEX_op_sar_i32, { "r", "0", "ci" } },
> +    { INDEX_op_shl_i32, { "r", "0", "Ci" } },
> +    { INDEX_op_shr_i32, { "r", "0", "Ci" } },
> +    { INDEX_op_sar_i32, { "r", "0", "Ci" } },
>      { INDEX_op_rotl_i32, { "r", "0", "ci" } },
>      { INDEX_op_rotr_i32, { "r", "0", "ci" } },
>  
> @@ -2123,9 +2159,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_xor_i64, { "r", "0", "re" } },
>      { INDEX_op_andc_i64, { "r", "r", "rI" } },
>  
> -    { INDEX_op_shl_i64, { "r", "0", "ci" } },
> -    { INDEX_op_shr_i64, { "r", "0", "ci" } },
> -    { INDEX_op_sar_i64, { "r", "0", "ci" } },
> +    { INDEX_op_shl_i64, { "r", "0", "Ci" } },
> +    { INDEX_op_shr_i64, { "r", "0", "Ci" } },
> +    { INDEX_op_sar_i64, { "r", "0", "Ci" } },
>      { INDEX_op_rotl_i64, { "r", "0", "ci" } },
>      { INDEX_op_rotr_i64, { "r", "0", "ci" } },
>  
> @@ -2283,6 +2319,9 @@ static void tcg_target_init(TCGContext *s)
>  #ifdef bit_BMI
>          have_bmi1 = (b & bit_BMI) != 0;
>  #endif
> +#ifndef have_bmi2
> +        have_bmi2 = (b & bit_BMI2) != 0;
> +#endif
>      }
>  
>      if (TCG_TARGET_REG_BITS == 64) {

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>


-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

  parent reply	other threads:[~2014-02-16 18:12 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-31 14:43 [Qemu-devel] [PATCH 0/5] tcg/i386 support for bmi Richard Henderson
2014-01-31 14:43 ` [Qemu-devel] [PATCH 1/5] disas/i386: Disassemble ANDN/SHLX/SHRX/SHAX Richard Henderson
2014-02-16 18:12   ` Aurelien Jarno
2014-01-31 14:43 ` [Qemu-devel] [PATCH 2/5] tcg/i386: Move TCG_CT_CONST_* to tcg-target.c Richard Henderson
2014-02-16 18:12   ` Aurelien Jarno
2014-01-31 14:43 ` [Qemu-devel] [PATCH 3/5] tcg/i386: Add tcg_out_vex_modrm Richard Henderson
2014-02-16 18:12   ` Aurelien Jarno
2014-01-31 14:43 ` [Qemu-devel] [PATCH 4/5] tcg/i386: Use ANDN instruction Richard Henderson
2014-02-16 18:12   ` Aurelien Jarno
2014-02-17 16:18     ` Richard Henderson
2014-02-20 16:25   ` Peter Maydell
2014-02-20 16:42     ` Peter Maydell
2014-02-20 16:43     ` Richard Henderson
2014-02-20 17:38       ` Peter Maydell
2014-01-31 14:43 ` [Qemu-devel] [PATCH 5/5] tcg/i386: Use SHLX/SHRX/SARX instructions Richard Henderson
2014-02-16 14:21   ` Paolo Bonzini
2014-02-16 17:57     ` Richard Henderson
2014-02-17 16:01     ` Richard Henderson
2014-02-16 18:12   ` Aurelien Jarno [this message]
2014-02-14 21:44 ` [Qemu-devel] [PATCH 0/5] tcg/i386 support for bmi Richard Henderson
2014-02-16 14:22   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140216181211.GA18676@hall.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).