All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v4 24/64] tcg: Add clz and ctz opcodes
Date: Thu, 08 Dec 2016 17:44:26 +0000	[thread overview]
Message-ID: <874m2ez56t.fsf@linaro.org> (raw)
In-Reply-To: <1479906121-12211-25-git-send-email-rth@twiddle.net>


Richard Henderson <rth@twiddle.net> writes:

> Signed-off-by: Richard Henderson <rth@twiddle.net>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  tcg-runtime.c            |  20 +++++++
>  tcg/README               |   8 +++
>  tcg/aarch64/tcg-target.h |   4 ++
>  tcg/arm/tcg-target.h     |   2 +
>  tcg/i386/tcg-target.h    |   4 ++
>  tcg/ia64/tcg-target.h    |   4 ++
>  tcg/mips/tcg-target.h    |   2 +
>  tcg/optimize.c           |  36 ++++++++++++
>  tcg/ppc/tcg-target.h     |   4 ++
>  tcg/s390/tcg-target.h    |   4 ++
>  tcg/sparc/tcg-target.h   |   4 ++
>  tcg/tcg-op.c             | 143 +++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg-op.h             |  16 ++++++
>  tcg/tcg-opc.h            |   4 ++
>  tcg/tcg-runtime.h        |   5 ++
>  tcg/tcg.h                |   2 +
>  tcg/tci/tcg-target.h     |   4 ++
>  17 files changed, 266 insertions(+)
>
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index 9327b6f..eb3bade 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -101,6 +101,26 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2)
>      return h;
>  }
>
> +uint32_t HELPER(clz_i32)(uint32_t arg, uint32_t zero_val)
> +{
> +    return arg ? clz32(arg) : zero_val;
> +}
> +
> +uint32_t HELPER(ctz_i32)(uint32_t arg, uint32_t zero_val)
> +{
> +    return arg ? ctz32(arg) : zero_val;
> +}
> +
> +uint64_t HELPER(clz_i64)(uint64_t arg, uint64_t zero_val)
> +{
> +    return arg ? clz64(arg) : zero_val;
> +}
> +
> +uint64_t HELPER(ctz_i64)(uint64_t arg, uint64_t zero_val)
> +{
> +    return arg ? ctz64(arg) : zero_val;
> +}
> +
>  void HELPER(exit_atomic)(CPUArchState *env)
>  {
>      cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
> diff --git a/tcg/README b/tcg/README
> index 065d9c2..f5ccf04 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -246,6 +246,14 @@ t0=~(t1|t2)
>
>  t0=t1|~t2
>
> +* clz_i32/i64 t0, t1, t2
> +
> +t0 = t1 ? clz(t1) : t2
> +
> +* ctz_i32/i64 t0, t1, t2
> +
> +t0 = t1 ? ctz(t1) : t2
> +
>  ********* Shifts/Rotates
>
>  * shl_i32/i64 t0, t1, t2
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 4a74bd8..976f493 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -62,6 +62,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_eqv_i32          1
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_deposit_i32      1
>  #define TCG_TARGET_HAS_extract_i32      1
>  #define TCG_TARGET_HAS_sextract_i32     1
> @@ -94,6 +96,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_eqv_i64          1
>  #define TCG_TARGET_HAS_nand_i64         0
>  #define TCG_TARGET_HAS_nor_i64          0
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_deposit_i64      1
>  #define TCG_TARGET_HAS_extract_i64      1
>  #define TCG_TARGET_HAS_sextract_i64     1
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 4e30728..02cc242 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -110,6 +110,8 @@ extern bool use_idiv_instructions;
>  #define TCG_TARGET_HAS_eqv_i32          0
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
>  #define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
>  #define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index dc19c47..f2d9955 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -93,6 +93,8 @@ extern bool have_bmi1;
>  #define TCG_TARGET_HAS_eqv_i32          0
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_deposit_i32      1
>  #define TCG_TARGET_HAS_extract_i32      1
>  #define TCG_TARGET_HAS_sextract_i32     1
> @@ -125,6 +127,8 @@ extern bool have_bmi1;
>  #define TCG_TARGET_HAS_eqv_i64          0
>  #define TCG_TARGET_HAS_nand_i64         0
>  #define TCG_TARGET_HAS_nor_i64          0
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_deposit_i64      1
>  #define TCG_TARGET_HAS_extract_i64      1
>  #define TCG_TARGET_HAS_sextract_i64     0
> diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
> index 8856dc8..9a829ae 100644
> --- a/tcg/ia64/tcg-target.h
> +++ b/tcg/ia64/tcg-target.h
> @@ -140,6 +140,10 @@ typedef enum {
>  #define TCG_TARGET_HAS_nand_i32         1
>  #define TCG_TARGET_HAS_nand_i64         1
>  #define TCG_TARGET_HAS_nor_i32          1
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i32          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_nor_i64          1
>  #define TCG_TARGET_HAS_orc_i32          1
>  #define TCG_TARGET_HAS_orc_i64          1
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index f1c3137..f133684 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -109,6 +109,8 @@ extern bool use_mips32r2_instructions;
>  #define TCG_TARGET_HAS_rem_i32          1
>  #define TCG_TARGET_HAS_not_i32          1
>  #define TCG_TARGET_HAS_nor_i32          1
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_andc_i32         0
>  #define TCG_TARGET_HAS_orc_i32          0
>  #define TCG_TARGET_HAS_eqv_i32          0
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 9e26bb7..e7ecce4 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -296,6 +296,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
>      CASE_OP_32_64(nor):
>          return ~(x | y);
>
> +    case INDEX_op_clz_i32:
> +        return (uint32_t)x ? clz32(x) : y;
> +
> +    case INDEX_op_clz_i64:
> +        return x ? clz64(x) : y;
> +
> +    case INDEX_op_ctz_i32:
> +        return (uint32_t)x ? ctz32(x) : y;
> +
> +    case INDEX_op_ctz_i64:
> +        return x ? ctz64(x) : y;
> +
>      CASE_OP_32_64(ext8s):
>          return (int8_t)x;
>
> @@ -896,6 +908,16 @@ void tcg_optimize(TCGContext *s)
>              mask = temps[args[1]].mask | temps[args[2]].mask;
>              break;
>
> +        case INDEX_op_clz_i32:
> +        case INDEX_op_ctz_i32:
> +            mask = temps[args[2]].mask | 31;
> +            break;
> +
> +        case INDEX_op_clz_i64:
> +        case INDEX_op_ctz_i64:
> +            mask = temps[args[2]].mask | 63;
> +            break;
> +
>          CASE_OP_32_64(setcond):
>          case INDEX_op_setcond2_i32:
>              mask = 1;
> @@ -1052,6 +1074,20 @@ void tcg_optimize(TCGContext *s)
>              }
>              goto do_default;
>
> +        CASE_OP_32_64(clz):
> +        CASE_OP_32_64(ctz):
> +            if (temp_is_const(args[1])) {
> +                TCGArg v = temps[args[1]].val;
> +                if (v != 0) {
> +                    tmp = do_constant_folding(opc, v, 0);
> +                    tcg_opt_gen_movi(s, op, args, args[0], tmp);
> +                } else {
> +                    tcg_opt_gen_mov(s, op, args, args[0], args[2]);
> +                }
> +                break;
> +            }
> +            goto do_default;
> +
>          CASE_OP_32_64(deposit):
>              if (temp_is_const(args[1]) && temp_is_const(args[2])) {
>                  tmp = deposit64(temps[args[1]].val, args[3], args[4],
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index b42c57a..698a599 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -68,6 +68,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_eqv_i32          1
>  #define TCG_TARGET_HAS_nand_i32         1
>  #define TCG_TARGET_HAS_nor_i32          1
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_deposit_i32      1
>  #define TCG_TARGET_HAS_extract_i32      1
>  #define TCG_TARGET_HAS_sextract_i32     0
> @@ -101,6 +103,8 @@ typedef enum {
>  #define TCG_TARGET_HAS_eqv_i64          1
>  #define TCG_TARGET_HAS_nand_i64         1
>  #define TCG_TARGET_HAS_nor_i64          1
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_deposit_i64      1
>  #define TCG_TARGET_HAS_extract_i64      1
>  #define TCG_TARGET_HAS_sextract_i64     0
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index e9ac12e..3ac2dc9 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -77,6 +77,8 @@ extern uint64_t s390_facilities;
>  #define TCG_TARGET_HAS_eqv_i32        0
>  #define TCG_TARGET_HAS_nand_i32       0
>  #define TCG_TARGET_HAS_nor_i32        0
> +#define TCG_TARGET_HAS_clz_i32        0
> +#define TCG_TARGET_HAS_ctz_i32        0
>  #define TCG_TARGET_HAS_deposit_i32    (s390_facilities & FACILITY_GEN_INST_EXT)
>  #define TCG_TARGET_HAS_extract_i32    (s390_facilities & FACILITY_GEN_INST_EXT)
>  #define TCG_TARGET_HAS_sextract_i32   0
> @@ -108,6 +110,8 @@ extern uint64_t s390_facilities;
>  #define TCG_TARGET_HAS_eqv_i64        0
>  #define TCG_TARGET_HAS_nand_i64       0
>  #define TCG_TARGET_HAS_nor_i64        0
> +#define TCG_TARGET_HAS_clz_i64        0
> +#define TCG_TARGET_HAS_ctz_i64        0
>  #define TCG_TARGET_HAS_deposit_i64    (s390_facilities & FACILITY_GEN_INST_EXT)
>  #define TCG_TARGET_HAS_extract_i64    (s390_facilities & FACILITY_GEN_INST_EXT)
>  #define TCG_TARGET_HAS_sextract_i64   0
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index a212167..340837a 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -110,6 +110,8 @@ extern bool use_vis3_instructions;
>  #define TCG_TARGET_HAS_eqv_i32          0
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_deposit_i32      0
>  #define TCG_TARGET_HAS_extract_i32      0
>  #define TCG_TARGET_HAS_sextract_i32     0
> @@ -142,6 +144,8 @@ extern bool use_vis3_instructions;
>  #define TCG_TARGET_HAS_eqv_i64          0
>  #define TCG_TARGET_HAS_nand_i64         0
>  #define TCG_TARGET_HAS_nor_i64          0
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_deposit_i64      0
>  #define TCG_TARGET_HAS_extract_i64      0
>  #define TCG_TARGET_HAS_sextract_i64     0
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 1927e53..2b520c1 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -457,6 +457,85 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
>      }
>  }
>
> +void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
> +{
> +    if (TCG_TARGET_HAS_clz_i32) {
> +        tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
> +    } else if (TCG_TARGET_HAS_clz_i64) {
> +        TCGv_i64 t1 = tcg_temp_new_i64();
> +        TCGv_i64 t2 = tcg_temp_new_i64();
> +        tcg_gen_extu_i32_i64(t1, arg1);
> +        tcg_gen_extu_i32_i64(t2, arg2);
> +        tcg_gen_addi_i64(t2, t2, 32);
> +        tcg_gen_clz_i64(t1, t1, t2);
> +        tcg_gen_extrl_i64_i32(ret, t1);
> +        tcg_temp_free_i64(t1);
> +        tcg_temp_free_i64(t2);
> +        tcg_gen_subi_i32(ret, ret, 32);
> +    } else {
> +        gen_helper_clz_i32(ret, arg1, arg2);
> +    }
> +}
> +
> +void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
> +{
> +    TCGv_i32 t = tcg_const_i32(arg2);
> +    tcg_gen_clz_i32(ret, arg1, t);
> +    tcg_temp_free_i32(t);
> +}
> +
> +void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
> +{
> +    if (TCG_TARGET_HAS_ctz_i32) {
> +        tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
> +    } else if (TCG_TARGET_HAS_ctz_i64) {
> +        TCGv_i64 t1 = tcg_temp_new_i64();
> +        TCGv_i64 t2 = tcg_temp_new_i64();
> +        tcg_gen_extu_i32_i64(t1, arg1);
> +        tcg_gen_extu_i32_i64(t2, arg2);
> +        tcg_gen_ctz_i64(t1, t1, t2);
> +        tcg_gen_extrl_i64_i32(ret, t1);
> +        tcg_temp_free_i64(t1);
> +        tcg_temp_free_i64(t2);
> +    } else if (TCG_TARGET_HAS_clz_i32) {
> +        TCGv_i32 t1 = tcg_temp_new_i32();
> +        TCGv_i32 t2 = tcg_temp_new_i32();
> +        tcg_gen_neg_i32(t1, arg1);
> +        tcg_gen_xori_i32(t2, arg2, 31);
> +        tcg_gen_and_i32(t1, t1, arg1);
> +        tcg_gen_clz_i32(ret, t1, t2);
> +        tcg_temp_free_i32(t1);
> +        tcg_temp_free_i32(t2);
> +        tcg_gen_xori_i32(ret, ret, 31);
> +    } else if (TCG_TARGET_HAS_clz_i64) {
> +        TCGv_i32 t1 = tcg_temp_new_i32();
> +        TCGv_i32 t2 = tcg_temp_new_i32();
> +        TCGv_i64 x1 = tcg_temp_new_i64();
> +        TCGv_i64 x2 = tcg_temp_new_i64();
> +        tcg_gen_neg_i32(t1, arg1);
> +        tcg_gen_xori_i32(t2, arg2, 63);
> +        tcg_gen_and_i32(t1, t1, arg1);
> +        tcg_gen_extu_i32_i64(x1, t1);
> +        tcg_gen_extu_i32_i64(x2, t2);
> +        tcg_temp_free_i32(t1);
> +        tcg_temp_free_i32(t2);
> +        tcg_gen_clz_i64(x1, x1, x2);
> +        tcg_gen_extrl_i64_i32(ret, x1);
> +        tcg_temp_free_i64(x1);
> +        tcg_temp_free_i64(x2);
> +        tcg_gen_xori_i32(ret, ret, 63);
> +    } else {
> +        gen_helper_ctz_i32(ret, arg1, arg2);
> +    }
> +}
> +
> +void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
> +{
> +    TCGv_i32 t = tcg_const_i32(arg2);
> +    tcg_gen_ctz_i32(ret, arg1, t);
> +    tcg_temp_free_i32(t);
> +}
> +
>  void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
>  {
>      if (TCG_TARGET_HAS_rot_i32) {
> @@ -1703,6 +1782,70 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
>      }
>  }
>
> +void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
> +{
> +    if (TCG_TARGET_HAS_clz_i64) {
> +        tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
> +    } else {
> +        gen_helper_clz_i64(ret, arg1, arg2);
> +    }
> +}
> +
> +void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
> +{
> +    if (TCG_TARGET_REG_BITS == 32
> +        && TCG_TARGET_HAS_clz_i32
> +        && arg2 <= 0xffffffffu) {
> +        TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32);
> +        tcg_gen_clz_i32(t, TCGV_LOW(arg1), t);
> +        tcg_gen_addi_i32(t, t, 32);
> +        tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
> +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
> +        tcg_temp_free_i32(t);
> +    } else {
> +        TCGv_i64 t = tcg_const_i64(arg2);
> +        tcg_gen_clz_i64(ret, arg1, t);
> +        tcg_temp_free_i64(t);
> +    }
> +}
> +
> +void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
> +{
> +    if (TCG_TARGET_HAS_ctz_i64) {
> +        tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
> +    } else if (TCG_TARGET_HAS_clz_i64) {
> +        TCGv_i64 t1 = tcg_temp_new_i64();
> +        TCGv_i64 t2 = tcg_temp_new_i64();
> +        tcg_gen_neg_i64(t1, arg1);
> +        tcg_gen_xori_i64(t2, arg2, 63);
> +        tcg_gen_and_i64(t1, t1, arg1);
> +        tcg_gen_clz_i64(ret, t1, t2);
> +        tcg_temp_free_i64(t1);
> +        tcg_temp_free_i64(t2);
> +        tcg_gen_xori_i64(ret, ret, 63);
> +    } else {
> +        gen_helper_ctz_i64(ret, arg1, arg2);
> +    }
> +}
> +
> +void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
> +{
> +    if (TCG_TARGET_REG_BITS == 32
> +        && TCG_TARGET_HAS_ctz_i32
> +        && arg2 <= 0xffffffffu) {
> +        TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32);
> +        tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32);
> +        tcg_gen_addi_i32(t32, t32, 32);
> +        tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
> +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
> +        tcg_temp_free_i32(t32);
> +    } else {
> +        TCGv_i64 t64 = tcg_const_i64(arg2);
> +        tcg_gen_ctz_i64(ret, arg1, t64);
> +        tcg_temp_free_i64(t64);
> +    }
> +}
> +
>  void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
>  {
>      if (TCG_TARGET_HAS_rot_i64) {
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index d42fd0d..7a24e84 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -286,6 +286,10 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
>  void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
>  void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
>  void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
> +void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
> +void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
> +void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
> +void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
>  void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
>  void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
>  void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
> @@ -469,6 +473,10 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
>  void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
>  void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
>  void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
> +void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
> +void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
> +void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
> +void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
>  void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
>  void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
>  void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
> @@ -958,6 +966,10 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
>  #define tcg_gen_nand_tl tcg_gen_nand_i64
>  #define tcg_gen_nor_tl tcg_gen_nor_i64
>  #define tcg_gen_orc_tl tcg_gen_orc_i64
> +#define tcg_gen_clz_tl tcg_gen_clz_i64
> +#define tcg_gen_ctz_tl tcg_gen_ctz_i64
> +#define tcg_gen_clzi_tl tcg_gen_clzi_i64
> +#define tcg_gen_ctzi_tl tcg_gen_ctzi_i64
>  #define tcg_gen_rotl_tl tcg_gen_rotl_i64
>  #define tcg_gen_rotli_tl tcg_gen_rotli_i64
>  #define tcg_gen_rotr_tl tcg_gen_rotr_i64
> @@ -1049,6 +1061,10 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
>  #define tcg_gen_nand_tl tcg_gen_nand_i32
>  #define tcg_gen_nor_tl tcg_gen_nor_i32
>  #define tcg_gen_orc_tl tcg_gen_orc_i32
> +#define tcg_gen_clz_tl tcg_gen_clz_i32
> +#define tcg_gen_ctz_tl tcg_gen_ctz_i32
> +#define tcg_gen_clzi_tl tcg_gen_clzi_i32
> +#define tcg_gen_ctzi_tl tcg_gen_ctzi_i32
>  #define tcg_gen_rotl_tl tcg_gen_rotl_i32
>  #define tcg_gen_rotli_tl tcg_gen_rotli_i32
>  #define tcg_gen_rotr_tl tcg_gen_rotr_i32
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index 11563ac..d00db4f 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -104,6 +104,8 @@ DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
>  DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
>  DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
>  DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
> +DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
> +DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
>
>  DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
>  DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
> @@ -171,6 +173,8 @@ DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
>  DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
>  DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
>  DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
> +DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
> +DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
>
>  DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
>  DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
> index 1deb86a..eb1cd76 100644
> --- a/tcg/tcg-runtime.h
> +++ b/tcg/tcg-runtime.h
> @@ -15,6 +15,11 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
>  DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
>  DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>
> +DEF_HELPER_FLAGS_2(clz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
> +DEF_HELPER_FLAGS_2(ctz_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
> +DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> +
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
>  #ifdef CONFIG_SOFTMMU
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 144bdab..e026282 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -111,6 +111,8 @@ typedef uint64_t TCGRegSet;
>  #define TCG_TARGET_HAS_eqv_i64          0
>  #define TCG_TARGET_HAS_nand_i64         0
>  #define TCG_TARGET_HAS_nor_i64          0
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_deposit_i64      0
>  #define TCG_TARGET_HAS_extract_i64      0
>  #define TCG_TARGET_HAS_sextract_i64     0
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index 2065042..0646444 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -74,6 +74,8 @@
>  #define TCG_TARGET_HAS_eqv_i32          0
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
> +#define TCG_TARGET_HAS_clz_i32          0
> +#define TCG_TARGET_HAS_ctz_i32          0
>  #define TCG_TARGET_HAS_neg_i32          1
>  #define TCG_TARGET_HAS_not_i32          1
>  #define TCG_TARGET_HAS_orc_i32          0
> @@ -104,6 +106,8 @@
>  #define TCG_TARGET_HAS_eqv_i64          0
>  #define TCG_TARGET_HAS_nand_i64         0
>  #define TCG_TARGET_HAS_nor_i64          0
> +#define TCG_TARGET_HAS_clz_i64          0
> +#define TCG_TARGET_HAS_ctz_i64          0
>  #define TCG_TARGET_HAS_neg_i64          1
>  #define TCG_TARGET_HAS_not_i64          1
>  #define TCG_TARGET_HAS_orc_i64          0


--
Alex Bennée

  reply	other threads:[~2016-12-08 17:45 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-23 13:00 [Qemu-devel] [PATCH v4 00/64] tcg 2.9 patch queue Richard Henderson
2016-11-23 13:00 ` [Qemu-devel] [PATCH v4 01/64] tcg: Add field extraction primitives Richard Henderson
2016-12-05 13:17   ` Alex Bennée
2016-12-05 15:14     ` Richard Henderson
2016-11-23 13:00 ` [Qemu-devel] [PATCH v4 02/64] tcg: Minor adjustments to deposit expanders Richard Henderson
2016-12-05 13:18   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 03/64] tcg: Add deposit_z expander Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 04/64] tcg/aarch64: Implement field extraction opcodes Richard Henderson
2016-12-06 12:24   ` Alex Bennée
2016-12-06 16:36     ` Richard Henderson
2016-12-09 15:41       ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 05/64] tcg/arm: Move isa detection to tcg-target.h Richard Henderson
2016-12-06 12:34   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 06/64] tcg/arm: Implement field extraction opcodes Richard Henderson
2016-12-06 16:16   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 07/64] tcg/i386: " Richard Henderson
2016-11-25 11:16   ` Paolo Bonzini
2016-11-25 11:21     ` Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 08/64] tcg/mips: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 09/64] tcg/ppc: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 10/64] tcg/s390: Expose host facilities to tcg-target.h Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 11/64] tcg/s390: Implement field extraction opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 12/64] tcg/s390: Support deposit into zero Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 13/64] target-alpha: Use deposit and extract ops Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 14/64] target-arm: Use new " Richard Henderson
2016-12-01 17:19   ` Alex Bennée
2016-12-03 21:01     ` Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 15/64] target-i386: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 16/64] target-mips: Use the new extract op Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 17/64] target-ppc: Use the new deposit and extract ops Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 18/64] target-s390x: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 19/64] tcg/optimize: Fold movcond 0/1 into setcond Richard Henderson
2016-12-06 16:22   ` Alex Bennée
2016-12-06 16:33     ` Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 20/64] tcg: Add markup for output requires new register Richard Henderson
2016-12-06 16:34   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 21/64] tcg: Transition flat op_defs array to a target callback Richard Henderson
2016-12-06 16:38   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 22/64] tcg: Pass the opcode width to target_parse_constraint Richard Henderson
2016-12-06 16:43   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 23/64] tcg: Allow an operand to be matching or a constant Richard Henderson
2016-12-08 17:19   ` Alex Bennée
2016-12-08 17:49     ` Richard Henderson
2016-12-08 20:38       ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 24/64] tcg: Add clz and ctz opcodes Richard Henderson
2016-12-08 17:44   ` Alex Bennée [this message]
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 25/64] disas/i386.c: Handle tzcnt Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 26/64] disas/ppc: Handle popcnt and cnttz Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 27/64] target-alpha: Use the ctz and clz opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 28/64] target-cris: Use clz opcode Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 29/64] target-microblaze: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 30/64] target-mips: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 31/64] target-openrisc: Use clz and ctz opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 32/64] target-ppc: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 33/64] target-s390x: Use clz opcode Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 34/64] target-tilegx: Use clz and ctz opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 35/64] target-tricore: Use clz opcode Richard Henderson
2016-11-23 14:58   ` Bastian Koppelmann
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 36/64] target-unicore32: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 37/64] target-xtensa: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 38/64] target-arm: " Richard Henderson
2016-12-08 17:47   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 39/64] target-i386: Use clz and ctz opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 40/64] tcg/ppc: Handle ctz and clz opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 41/64] tcg/aarch64: " Richard Henderson
2016-12-01 18:36   ` Alex Bennée
2016-12-01 18:44     ` Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 42/64] tcg/arm: " Richard Henderson
2016-12-08 17:56   ` Alex Bennée
2016-12-08 18:13     ` Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 43/64] tcg/mips: Handle clz opcode Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 44/64] tcg/s390: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 45/64] tcg/i386: Fuly convert tcg_target_op_def Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 46/64] tcg/i386: Hoist common arguments in tcg_out_op Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 47/64] tcg/i386: Allow bmi2 shiftx to have non-matching operands Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 48/64] tcg/i386: Handle ctz and clz opcodes Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 49/64] tcg/i386: Rely on undefined/undocumented behaviour of BSF/BSR Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 50/64] tcg: Add helpers for clrsb Richard Henderson
2016-12-09  9:51   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 51/64] target-arm: Use clrsb helper Richard Henderson
2016-12-09  9:52   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 52/64] target-tricore: " Richard Henderson
2016-11-23 14:58   ` Bastian Koppelmann
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 53/64] target-xtensa: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 54/64] tcg: Add opcode for ctpop Richard Henderson
2016-12-09  9:57   ` Alex Bennée
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 55/64] target-alpha: Use ctpop helper Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 56/64] target-ppc: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 57/64] target-s390x: Avoid a loop for popcnt Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 58/64] target-sparc: Use ctpop helper Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 59/64] target-tilegx: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 60/64] target-i386: " Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 61/64] qemu/host-utils.h: Reduce the operation count in the fallback ctpop Richard Henderson
2016-12-09 14:41   ` Alex Bennée
2016-12-09 17:18     ` Richard Henderson
2016-11-23 13:01 ` [Qemu-devel] [PATCH v4 62/64] tcg: Use ctpop to generate ctz if needed Richard Henderson
2016-12-09 16:07   ` Alex Bennée
2016-12-09 16:48     ` Richard Henderson
2016-11-23 13:02 ` [Qemu-devel] [PATCH v4 63/64] tcg/ppc: Handle ctpop opcode Richard Henderson
2016-11-23 13:02 ` [Qemu-devel] [PATCH v4 64/64] tcg/i386: " Richard Henderson
2016-11-29 13:33 ` [Qemu-devel] [PATCH v4 00/64] tcg 2.9 patch queue no-reply
2016-12-09 16:08 ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874m2ez56t.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.