Re: [Qemu-devel] [PATCH v11 02/20] tcg: Add types and basic operations for host vectors

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, peter.maydell@linaro.org
Subject: Re: [Qemu-devel] [PATCH v11 02/20] tcg: Add types and basic operations for host vectors
Date: Tue, 06 Feb 2018 08:53:37 +0000	[thread overview]
Message-ID: <87zi4mwlbi.fsf@linaro.org> (raw)
In-Reply-To: <20180126045742.5487-3-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Nothing uses or enables them yet.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  Makefile.target  |   4 +-
>  tcg/tcg-op.h     |  27 +++++
>  tcg/tcg-opc.h    |  25 +++++
>  tcg/tcg.h        |  56 +++++++++++
>  tcg/tcg-op-vec.c | 292 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg.c        |  96 +++++++++++++++++-
>  tcg/README       |  49 ++++++++++
>  7 files changed, 543 insertions(+), 6 deletions(-)
>  create mode 100644 tcg/tcg-op-vec.c
>
> diff --git a/Makefile.target b/Makefile.target
> index f9a9da7e7c..7f30a1e725 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -93,8 +93,8 @@ all: $(PROGS) stap
>  # cpu emulator library
>  obj-y += exec.o
>  obj-y += accel/
> -obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
> -obj-$(CONFIG_TCG) += tcg/tcg-common.o
> +obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-vec.o
> +obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/optimize.o
>  obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
>  obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
>  obj-y += fpu/softfloat.o
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index ca07b32b65..0c02d86b8b 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -35,6 +35,10 @@ void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
>  void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
>  void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
>
> +void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
> +void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
> +void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
> +
>  static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
>  {
>      tcg_gen_op1(opc, tcgv_i32_arg(a1));
> @@ -903,6 +907,27 @@ void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
>  void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
>  void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
>
> +void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
> +void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
> +void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
> +void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
> +void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
> +void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
> +void tcg_gen_dup64i_vec(TCGv_vec, uint64_t);
> +void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
> +void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
> +
> +void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
> +void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
> +void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
> +
>  #if TARGET_LONG_BITS == 64
>  #define tcg_gen_movi_tl tcg_gen_movi_i64
>  #define tcg_gen_mov_tl tcg_gen_mov_i64
> @@ -1001,6 +1026,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
>  #define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
>  #define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
>  #define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
> +#define tcg_gen_dup_tl_vec  tcg_gen_dup_i64_vec
>  #else
>  #define tcg_gen_movi_tl tcg_gen_movi_i32
>  #define tcg_gen_mov_tl tcg_gen_mov_i32
> @@ -1098,6 +1124,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
>  #define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
>  #define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
>  #define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
> +#define tcg_gen_dup_tl_vec  tcg_gen_dup_i32_vec
>  #endif
>
>  #if UINTPTR_MAX == UINT32_MAX
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index 956fb1e9f3..b851ad4bca 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -204,8 +204,33 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
>  DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
>      TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
>
> +/* Host vector support.  */
> +
> +#define IMPLVEC  TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
> +
> +DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
> +DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
> +
> +DEF(dup_vec, 1, 1, 0, IMPLVEC)
> +DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
> +
> +DEF(ld_vec, 1, 1, 1, IMPLVEC)
> +DEF(st_vec, 0, 2, 1, IMPLVEC)
> +
> +DEF(add_vec, 1, 2, 0, IMPLVEC)
> +DEF(sub_vec, 1, 2, 0, IMPLVEC)
> +DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
> +
> +DEF(and_vec, 1, 2, 0, IMPLVEC)
> +DEF(or_vec, 1, 2, 0, IMPLVEC)
> +DEF(xor_vec, 1, 2, 0, IMPLVEC)
> +DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
> +DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
> +DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
> +
>  #undef TLADDR_ARGS
>  #undef DATA64_ARGS
>  #undef IMPL
>  #undef IMPL64
> +#undef IMPLVEC
>  #undef DEF
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 2ce497cebf..dce483b0ee 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -170,6 +170,27 @@ typedef uint64_t TCGRegSet;
>  # error "Missing unsigned widening multiply"
>  #endif
>
> +#if !defined(TCG_TARGET_HAS_v64) \
> +    && !defined(TCG_TARGET_HAS_v128) \
> +    && !defined(TCG_TARGET_HAS_v256)
> +#define TCG_TARGET_MAYBE_vec            0
> +#define TCG_TARGET_HAS_neg_vec          0
> +#define TCG_TARGET_HAS_not_vec          0
> +#define TCG_TARGET_HAS_andc_vec         0
> +#define TCG_TARGET_HAS_orc_vec          0
> +#else
> +#define TCG_TARGET_MAYBE_vec            1
> +#endif
> +#ifndef TCG_TARGET_HAS_v64
> +#define TCG_TARGET_HAS_v64              0
> +#endif
> +#ifndef TCG_TARGET_HAS_v128
> +#define TCG_TARGET_HAS_v128             0
> +#endif
> +#ifndef TCG_TARGET_HAS_v256
> +#define TCG_TARGET_HAS_v256             0
> +#endif
> +
>  #ifndef TARGET_INSN_START_EXTRA_WORDS
>  # define TARGET_INSN_START_WORDS 1
>  #else
> @@ -246,6 +267,11 @@ typedef struct TCGPool {
>  typedef enum TCGType {
>      TCG_TYPE_I32,
>      TCG_TYPE_I64,
> +
> +    TCG_TYPE_V64,
> +    TCG_TYPE_V128,
> +    TCG_TYPE_V256,
> +
>      TCG_TYPE_COUNT, /* number of different types */
>
>      /* An alias for the size of the host register.  */
> @@ -396,6 +422,8 @@ typedef tcg_target_ulong TCGArg;
>      * TCGv_i32 : 32 bit integer type
>      * TCGv_i64 : 64 bit integer type
>      * TCGv_ptr : a host pointer type
> +    * TCGv_vec : a host vector type; the exact size is not exposed
> +                 to the CPU front-end code.
>      * TCGv : an integer type the same size as target_ulong
>               (an alias for either TCGv_i32 or TCGv_i64)
>     The compiler's type checking will complain if you mix them
> @@ -418,6 +446,7 @@ typedef tcg_target_ulong TCGArg;
>  typedef struct TCGv_i32_d *TCGv_i32;
>  typedef struct TCGv_i64_d *TCGv_i64;
>  typedef struct TCGv_ptr_d *TCGv_ptr;
> +typedef struct TCGv_vec_d *TCGv_vec;
>  typedef TCGv_ptr TCGv_env;
>  #if TARGET_LONG_BITS == 32
>  #define TCGv TCGv_i32
> @@ -589,6 +618,9 @@ typedef struct TCGOp {
>  #define TCGOP_CALLI(X)    (X)->param1
>  #define TCGOP_CALLO(X)    (X)->param2
>
> +#define TCGOP_VECL(X)     (X)->param1
> +#define TCGOP_VECE(X)     (X)->param2
> +
>  /* Make sure operands fit in the bitfields above.  */
>  QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
>
> @@ -726,6 +758,11 @@ static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v)
>      return tcgv_i32_temp((TCGv_i32)v);
>  }
>
> +static inline TCGTemp *tcgv_vec_temp(TCGv_vec v)
> +{
> +    return tcgv_i32_temp((TCGv_i32)v);
> +}
> +
>  static inline TCGArg tcgv_i32_arg(TCGv_i32 v)
>  {
>      return temp_arg(tcgv_i32_temp(v));
> @@ -741,6 +778,11 @@ static inline TCGArg tcgv_ptr_arg(TCGv_ptr v)
>      return temp_arg(tcgv_ptr_temp(v));
>  }
>
> +static inline TCGArg tcgv_vec_arg(TCGv_vec v)
> +{
> +    return temp_arg(tcgv_vec_temp(v));
> +}
> +
>  static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t)
>  {
>      (void)temp_idx(t); /* trigger embedded assert */
> @@ -757,6 +799,11 @@ static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t)
>      return (TCGv_ptr)temp_tcgv_i32(t);
>  }
>
> +static inline TCGv_vec temp_tcgv_vec(TCGTemp *t)
> +{
> +    return (TCGv_vec)temp_tcgv_i32(t);
> +}
> +
>  #if TCG_TARGET_REG_BITS == 32
>  static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
>  {
> @@ -832,9 +879,12 @@ TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr,
>
>  TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
>  TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
> +TCGv_vec tcg_temp_new_vec(TCGType type);
> +TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
>
>  void tcg_temp_free_i32(TCGv_i32 arg);
>  void tcg_temp_free_i64(TCGv_i64 arg);
> +void tcg_temp_free_vec(TCGv_vec arg);
>
>  static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
>                                                const char *name)
> @@ -916,6 +966,8 @@ enum {
>      /* Instruction is optional and not implemented by the host, or insn
>         is generic and should not be implemened by the host.  */
>      TCG_OPF_NOT_PRESENT  = 0x10,
> +    /* Instruction operands are vectors.  */
> +    TCG_OPF_VECTOR       = 0x20,
>  };
>
>  typedef struct TCGOpDef {
> @@ -981,6 +1033,10 @@ TCGv_i32 tcg_const_i32(int32_t val);
>  TCGv_i64 tcg_const_i64(int64_t val);
>  TCGv_i32 tcg_const_local_i32(int32_t val);
>  TCGv_i64 tcg_const_local_i64(int64_t val);
> +TCGv_vec tcg_const_zeros_vec(TCGType);
> +TCGv_vec tcg_const_ones_vec(TCGType);
> +TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec);
> +TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
>
>  TCGLabel *gen_new_label(void);
>
> diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
> new file mode 100644
> index 0000000000..9e4678878b
> --- /dev/null
> +++ b/tcg/tcg-op-vec.c
> @@ -0,0 +1,292 @@
> +/*
> + * Tiny Code Generator for QEMU
> + *
> + * Copyright (c) 2018 Linaro, Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +#include "cpu.h"
> +#include "exec/exec-all.h"
> +#include "tcg.h"
> +#include "tcg-op.h"
> +#include "tcg-mo.h"
> +
> +/* Reduce the number of ifdefs below.  This assumes that all uses of
> +   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
> +   the compiler can eliminate.  */
> +#if TCG_TARGET_REG_BITS == 64
> +extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
> +extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
> +#define TCGV_LOW  TCGV_LOW_link_error
> +#define TCGV_HIGH TCGV_HIGH_link_error
> +#endif
> +
> +void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
> +{
> +    TCGOp *op = tcg_emit_op(opc);
> +    TCGOP_VECL(op) = type - TCG_TYPE_V64;
> +    TCGOP_VECE(op) = vece;
> +    op->args[0] = r;
> +    op->args[1] = a;
> +}
> +
> +void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
> +               TCGArg r, TCGArg a, TCGArg b)
> +{
> +    TCGOp *op = tcg_emit_op(opc);
> +    TCGOP_VECL(op) = type - TCG_TYPE_V64;
> +    TCGOP_VECE(op) = vece;
> +    op->args[0] = r;
> +    op->args[1] = a;
> +    op->args[2] = b;
> +}
> +
> +void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
> +               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
> +{
> +    TCGOp *op = tcg_emit_op(opc);
> +    TCGOP_VECL(op) = type - TCG_TYPE_V64;
> +    TCGOP_VECE(op) = vece;
> +    op->args[0] = r;
> +    op->args[1] = a;
> +    op->args[2] = b;
> +    op->args[3] = c;
> +}
> +
> +static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
> +{
> +    TCGTemp *rt = tcgv_vec_temp(r);
> +    TCGTemp *at = tcgv_vec_temp(a);
> +    TCGType type = rt->base_type;
> +
> +    tcg_debug_assert(at->base_type == type);
> +    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
> +}
> +
> +static void vec_gen_op3(TCGOpcode opc, unsigned vece,
> +                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    TCGTemp *rt = tcgv_vec_temp(r);
> +    TCGTemp *at = tcgv_vec_temp(a);
> +    TCGTemp *bt = tcgv_vec_temp(b);
> +    TCGType type = rt->base_type;
> +
> +    tcg_debug_assert(at->base_type == type);
> +    tcg_debug_assert(bt->base_type == type);
> +    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
> +}
> +
> +void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
> +{
> +    if (r != a) {
> +        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
> +    }
> +}
> +
> +#define MO_REG  (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
> +
> +static void tcg_gen_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
> +{
> +    TCGTemp *rt = tcgv_vec_temp(r);
> +    vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
> +}
> +
> +TCGv_vec tcg_const_zeros_vec(TCGType type)
> +{
> +    TCGv_vec ret = tcg_temp_new_vec(type);
> +    tcg_gen_dupi_vec(ret, MO_REG, 0);
> +    return ret;
> +}
> +
> +TCGv_vec tcg_const_ones_vec(TCGType type)
> +{
> +    TCGv_vec ret = tcg_temp_new_vec(type);
> +    tcg_gen_dupi_vec(ret, MO_REG, -1);
> +    return ret;
> +}
> +
> +TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
> +{
> +    TCGTemp *t = tcgv_vec_temp(m);
> +    return tcg_const_zeros_vec(t->base_type);
> +}
> +
> +TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
> +{
> +    TCGTemp *t = tcgv_vec_temp(m);
> +    return tcg_const_ones_vec(t->base_type);
> +}
> +
> +void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
> +{
> +    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
> +        tcg_gen_dupi_vec(r, MO_32, a);
> +    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
> +        tcg_gen_dupi_vec(r, MO_64, a);
> +    } else {
> +        TCGv_i64 c = tcg_const_i64(a);
> +        tcg_gen_dup_i64_vec(MO_64, r, c);
> +        tcg_temp_free_i64(c);
> +    }
> +}
> +
> +void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
> +{
> +    tcg_gen_dupi_vec(r, MO_REG, ((TCGArg)-1 / 0xffffffffu) * a);
> +}
> +
> +void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
> +{
> +    tcg_gen_dupi_vec(r, MO_REG, ((TCGArg)-1 / 0xffff) * (a & 0xffff));
> +}
> +
> +void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
> +{
> +    tcg_gen_dupi_vec(r, MO_REG, ((TCGArg)-1 / 0xff) * (a & 0xff));
> +}
> +
> +void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
> +{
> +    TCGArg ri = tcgv_vec_arg(r);
> +    TCGTemp *rt = arg_temp(ri);
> +    TCGType type = rt->base_type;
> +
> +    if (TCG_TARGET_REG_BITS == 64) {
> +        TCGArg ai = tcgv_i64_arg(a);
> +        vec_gen_2(INDEX_op_dup_vec, type, MO_64, ri, ai);
> +    } else if (vece == MO_64) {
> +        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
> +        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
> +        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
> +    } else {
> +        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
> +        vec_gen_2(INDEX_op_dup_vec, type, MO_64, ri, ai);
> +    }
> +}
> +
> +void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
> +{
> +    TCGArg ri = tcgv_vec_arg(r);
> +    TCGArg ai = tcgv_i32_arg(a);
> +    TCGTemp *rt = arg_temp(ri);
> +    TCGType type = rt->base_type;
> +
> +    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
> +}
> +
> +static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
> +{
> +    TCGArg ri = tcgv_vec_arg(r);
> +    TCGArg bi = tcgv_ptr_arg(b);
> +    TCGTemp *rt = arg_temp(ri);
> +    TCGType type = rt->base_type;
> +
> +    vec_gen_3(opc, type, 0, ri, bi, o);
> +}
> +
> +void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
> +{
> +    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
> +}
> +
> +void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
> +{
> +    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
> +}
> +
> +void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
> +{
> +    TCGArg ri = tcgv_vec_arg(r);
> +    TCGArg bi = tcgv_ptr_arg(b);
> +    TCGTemp *rt = arg_temp(ri);
> +    TCGType type = rt->base_type;
> +
> +    tcg_debug_assert(low_type >= TCG_TYPE_V64);
> +    tcg_debug_assert(low_type <= type);
> +    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
> +}
> +
> +void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    vec_gen_op3(INDEX_op_add_vec, vece, r, a, b);
> +}
> +
> +void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    vec_gen_op3(INDEX_op_sub_vec, vece, r, a, b);
> +}
> +
> +void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
> +}
> +
> +void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
> +}
> +
> +void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
> +}
> +
> +void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    if (TCG_TARGET_HAS_andc_vec) {
> +        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
> +    } else {
> +        TCGv_vec t = tcg_temp_new_vec_matching(r);
> +        tcg_gen_not_vec(0, t, b);
> +        tcg_gen_and_vec(0, r, a, t);
> +        tcg_temp_free_vec(t);
> +    }
> +}
> +
> +void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
> +{
> +    if (TCG_TARGET_HAS_orc_vec) {
> +        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
> +    } else {
> +        TCGv_vec t = tcg_temp_new_vec_matching(r);
> +        tcg_gen_not_vec(0, t, b);
> +        tcg_gen_or_vec(0, r, a, t);
> +        tcg_temp_free_vec(t);
> +    }
> +}
> +
> +void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
> +{
> +    if (TCG_TARGET_HAS_not_vec) {
> +        vec_gen_op2(INDEX_op_not_vec, 0, r, a);
> +    } else {
> +        TCGv_vec t = tcg_const_ones_vec_matching(r);
> +        tcg_gen_xor_vec(0, r, a, t);
> +        tcg_temp_free_vec(t);
> +    }
> +}
> +
> +void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
> +{
> +    if (TCG_TARGET_HAS_neg_vec) {
> +        vec_gen_op2(INDEX_op_neg_vec, vece, r, a);
> +    } else {
> +        TCGv_vec t = tcg_const_zeros_vec_matching(r);
> +        tcg_gen_sub_vec(vece, r, t, a);
> +        tcg_temp_free_vec(t);
> +    }
> +}
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 93caa0be93..42f0acdf8e 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -106,6 +106,18 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
>                           TCGReg ret, tcg_target_long arg);
>  static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>                         const int *const_args);
> +#if TCG_TARGET_MAYBE_vec
> +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
> +                           unsigned vece, const TCGArg *args,
> +                           const int *const_args);
> +#else
> +static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
> +                                  unsigned vece, const TCGArg *args,
> +                                  const int *const_args)
> +{
> +    g_assert_not_reached();
> +}
> +#endif
>  static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
>                         intptr_t arg2);
>  static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
> @@ -146,8 +158,7 @@ struct tcg_region_state {
>  };
>
>  static struct tcg_region_state region;
> -
> -static TCGRegSet tcg_target_available_regs[2];
> +static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
>  static TCGRegSet tcg_target_call_clobber_regs;
>
>  #if TCG_TARGET_INSN_UNIT_SIZE == 1
> @@ -1026,6 +1037,41 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
>      return temp_tcgv_i64(t);
>  }
>
> +TCGv_vec tcg_temp_new_vec(TCGType type)
> +{
> +    TCGTemp *t;
> +
> +#ifdef CONFIG_DEBUG_TCG
> +    switch (type) {
> +    case TCG_TYPE_V64:
> +        assert(TCG_TARGET_HAS_v64);
> +        break;
> +    case TCG_TYPE_V128:
> +        assert(TCG_TARGET_HAS_v128);
> +        break;
> +    case TCG_TYPE_V256:
> +        assert(TCG_TARGET_HAS_v256);
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +#endif
> +
> +    t = tcg_temp_new_internal(type, 0);
> +    return temp_tcgv_vec(t);
> +}
> +
> +/* Create a new temp of the same type as an existing temp.  */
> +TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
> +{
> +    TCGTemp *t = tcgv_vec_temp(match);
> +
> +    tcg_debug_assert(t->temp_allocated != 0);
> +
> +    t = tcg_temp_new_internal(t->base_type, 0);
> +    return temp_tcgv_vec(t);
> +}
> +
>  static void tcg_temp_free_internal(TCGTemp *ts)
>  {
>      TCGContext *s = tcg_ctx;
> @@ -1057,6 +1103,11 @@ void tcg_temp_free_i64(TCGv_i64 arg)
>      tcg_temp_free_internal(tcgv_i64_temp(arg));
>  }
>
> +void tcg_temp_free_vec(TCGv_vec arg)
> +{
> +    tcg_temp_free_internal(tcgv_vec_temp(arg));
> +}
> +
>  TCGv_i32 tcg_const_i32(int32_t val)
>  {
>      TCGv_i32 t0;
> @@ -1114,6 +1165,9 @@ int tcg_check_temp_count(void)
>     Test the runtime variable that controls each opcode.  */
>  bool tcg_op_supported(TCGOpcode op)
>  {
> +    const bool have_vec
> +        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
> +
>      switch (op) {
>      case INDEX_op_discard:
>      case INDEX_op_set_label:
> @@ -1327,6 +1381,28 @@ bool tcg_op_supported(TCGOpcode op)
>      case INDEX_op_mulsh_i64:
>          return TCG_TARGET_HAS_mulsh_i64;
>
> +    case INDEX_op_mov_vec:
> +    case INDEX_op_dup_vec:
> +    case INDEX_op_dupi_vec:
> +    case INDEX_op_ld_vec:
> +    case INDEX_op_st_vec:
> +    case INDEX_op_add_vec:
> +    case INDEX_op_sub_vec:
> +    case INDEX_op_and_vec:
> +    case INDEX_op_or_vec:
> +    case INDEX_op_xor_vec:
> +        return have_vec;
> +    case INDEX_op_dup2_vec:
> +        return have_vec && TCG_TARGET_REG_BITS == 32;
> +    case INDEX_op_not_vec:
> +        return have_vec && TCG_TARGET_HAS_not_vec;
> +    case INDEX_op_neg_vec:
> +        return have_vec && TCG_TARGET_HAS_neg_vec;
> +    case INDEX_op_andc_vec:
> +        return have_vec && TCG_TARGET_HAS_andc_vec;
> +    case INDEX_op_orc_vec:
> +        return have_vec && TCG_TARGET_HAS_orc_vec;
> +
>      case NB_OPS:
>          break;
>      }
> @@ -1661,6 +1737,11 @@ void tcg_dump_ops(TCGContext *s)
>              nb_iargs = def->nb_iargs;
>              nb_cargs = def->nb_cargs;
>
> +            if (def->flags & TCG_OPF_VECTOR) {
> +                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
> +                                8 << TCGOP_VECE(op));
> +            }
> +
>              k = 0;
>              for (i = 0; i < nb_oargs; i++) {
>                  if (k != 0) {
> @@ -2890,8 +2971,13 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
>      }
>
>      /* emit instruction */
> -    tcg_out_op(s, op->opc, new_args, const_args);
> -
> +    if (def->flags & TCG_OPF_VECTOR) {
> +        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
> +                       new_args, const_args);
> +    } else {
> +        tcg_out_op(s, op->opc, new_args, const_args);
> +    }
> +
>      /* move the outputs in the correct register if needed */
>      for(i = 0; i < nb_oargs; i++) {
>          ts = arg_temp(op->args[i]);
> @@ -3239,10 +3325,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>          switch (opc) {
>          case INDEX_op_mov_i32:
>          case INDEX_op_mov_i64:
> +        case INDEX_op_mov_vec:
>              tcg_reg_alloc_mov(s, op);
>              break;
>          case INDEX_op_movi_i32:
>          case INDEX_op_movi_i64:
> +        case INDEX_op_dupi_vec:
>              tcg_reg_alloc_movi(s, op);
>              break;
>          case INDEX_op_insn_start:
> diff --git a/tcg/README b/tcg/README
> index 03bfb6acd4..f4695307bd 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -503,6 +503,55 @@ of the memory access.
>  For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
>  64-bit memory access specified in flags.
>
> +********* Host vector operations
> +
> +All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.
> +The former specifies the length of the vector in log2 64-bit units; the
> +later specifies the length of the element (if applicable) in log2 8-bit units.
> +E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
> +
> +* mov_vec   v0, v1
> +* ld_vec    v0, t1
> +* st_vec    v0, t1
> +
> +  Move, load and store.
> +
> +* dup_vec  v0, r1
> +
> +  Duplicate the low N bits of R1 into VECL/VECE copies across V0.
> +
> +* dupi_vec v0, c
> +
> +  Similarly, for a constant.
> +  Smaller values will be replicated to host register size by the expanders.
> +
> +* dup2_vec v0, r1, r2
> +
> +  Duplicate r2:r1 into VECL/64 copies across V0.  This opcode is
> +  only present for 32-bit hosts.
> +
> +* add_vec   v0, v1, v2
> +
> +  v0 = v1 + v2, in elements across the vector.
> +
> +* sub_vec   v0, v1, v2
> +
> +  Similarly, v0 = v1 - v2.
> +
> +* neg_vec   v0, v1
> +
> +  Similarly, v0 = -v1.
> +
> +* and_vec   v0, v1, v2
> +* or_vec    v0, v1, v2
> +* xor_vec   v0, v1, v2
> +* andc_vec  v0, v1, v2
> +* orc_vec   v0, v1, v2
> +* not_vec   v0, v1
> +
> +  Similarly, logical operations with and without compliment.
> +  Note that VECE is unused.
> +
>  *********
>
>  Note 1: Some shortcuts are defined when the last operand is known to be


--
Alex Bennée

next prev parent reply	other threads:[~2018-02-06  8:53 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-26  4:57 [Qemu-devel] [PATCH v11 00/20] tcg: generic vector operations Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 01/20] tcg: Allow multiple word entries into the constant pool Richard Henderson
2018-02-06  8:51   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 02/20] tcg: Add types and basic operations for host vectors Richard Henderson
2018-02-06  8:53   ` Alex Bennée [this message]
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 03/20] tcg: Standardize integral arguments to expanders Richard Henderson
2018-02-06  8:57   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 04/20] tcg: Add generic vector expanders Richard Henderson
2018-02-06 10:59   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 05/20] tcg: Add generic vector ops for constant shifts Richard Henderson
2018-02-06 11:00   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 06/20] tcg: Add generic vector ops for comparisons Richard Henderson
2018-02-06 11:01   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 07/20] tcg: Add generic vector ops for multiplication Richard Henderson
2018-02-06 11:02   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 08/20] tcg: Add generic helpers for saturating arithmetic Richard Henderson
2018-02-06 11:03   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 09/20] tcg: Add generic vector helpers with a scalar operand Richard Henderson
2018-02-06 11:04   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 10/20] tcg/optimize: Handle vector opcodes during optimize Richard Henderson
2018-02-06 11:07   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 11/20] target/arm: Align vector registers Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 12/20] target/arm: Use vector infrastructure for aa64 add/sub/logic Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 13/20] target/arm: Use vector infrastructure for aa64 mov/not/neg Richard Henderson
2018-02-06 11:08   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 14/20] target/arm: Use vector infrastructure for aa64 dup/movi Richard Henderson
2018-02-06 11:09   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 15/20] target/arm: Use vector infrastructure for aa64 constant shifts Richard Henderson
2018-02-05 11:14   ` Peter Maydell
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 16/20] target/arm: Use vector infrastructure for aa64 compares Richard Henderson
2018-02-06 11:10   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 17/20] target/arm: Use vector infrastructure for aa64 multiplies Richard Henderson
2018-02-06 11:11   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 18/20] target/arm: Use vector infrastructure for aa64 orr/bic immediate Richard Henderson
2018-02-06 11:13   ` Alex Bennée
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 19/20] tcg/i386: Add vector operations Richard Henderson
2018-01-26  4:57 ` [Qemu-devel] [PATCH v11 20/20] tcg/aarch64: " Richard Henderson
2018-02-06 11:15   ` Alex Bennée
2018-01-26 17:25 ` [Qemu-devel] [PATCH v11 00/20] tcg: generic " no-reply
2018-02-06 11:24 ` Alex Bennée
2018-02-06 12:07   ` Philippe Mathieu-Daudé
2018-02-06 12:36     ` Alex Bennée
2018-02-06 16:24 ` Alex Bennée
2018-02-06 20:57   ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87zi4mwlbi.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.