qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kirill Batuzov <batuzovk@ispras.ru>
To: qemu-devel@nongnu.org
Cc: "Richard Henderson" <rth@twiddle.net>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Peter Crosthwaite" <crosthwaite.peter@gmail.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Andrzej Zaborowski" <balrogg@gmail.com>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Kirill Batuzov" <batuzovk@ispras.ru>
Subject: [Qemu-devel] [PATCH v2.1 08/21] tcg: add vector addition operations
Date: Thu,  2 Feb 2017 17:34:46 +0300	[thread overview]
Message-ID: <1486046099-17726-9-git-send-email-batuzovk@ispras.ru> (raw)
In-Reply-To: <1486046099-17726-1-git-send-email-batuzovk@ispras.ru>

Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---

Support for representing a v128 addition as two v64 additions have been added.
As a result GEN_VECT_WRAPPER_HALVES macro was added. It is larger and more
complicated than original GEN_VECT_WRAPPER (which is still used for v64 additions
because they do not have half operations (v32 additions)).

GEN_VECT_WRAPPER_HALVES seems to grow fast (in size and complexity) for each
supported representation. Calling tcg_gen_add_<smaller_size> may not be desirable
because last resort fallback code is better be generated for the whole vector as
it will require less additional operations.

Some additional performance optimization can be done by creating hand written
tcg_gen_internal_<operation> for some cases (for example, add_i8x16). This function
will still operate on memory locations but will use 64 bit scalar additions with some
bit masking as Richard suggested in v1 discussion. This series is focused on
infrastructure (not on optimization of particular instructions), so I have not
included this optimization yet.

---
 tcg/tcg-op.c  |  64 ++++++++++++++++++++++
 tcg/tcg-op.h  | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-opc.h |  12 +++++
 tcg/tcg.c     |  12 +++++
 tcg/tcg.h     |  43 +++++++++++++++
 5 files changed, 298 insertions(+)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 95a39b7..8a19eee 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3038,3 +3038,67 @@ static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
 GEN_ATOMIC_HELPER(xchg, mov2, 0)
 
 #undef GEN_ATOMIC_HELPER
+
+/* Find a memory location for 128-bit TCG variable. */
+void tcg_v128_to_ptr(TCGv_v128 tmp, TCGv_ptr base, int slot,
+                     TCGv_ptr *real_base, intptr_t *real_offset, int is_read)
+{
+    int idx = GET_TCGV_V128(tmp);
+    assert(idx >= 0 && idx < tcg_ctx.nb_temps);
+    if (idx < tcg_ctx.nb_globals) {
+        /* Globals use their locations within CPUArchState. */
+        int env = GET_TCGV_PTR(tcg_ctx.tcg_env);
+        TCGTemp *ts_env = &tcg_ctx.temps[env];
+        TCGTemp *ts_arg = &tcg_ctx.temps[idx];
+
+        /* Sanity checks: global's memory locations must be addressed
+           relative to ENV. */
+        assert(ts_env->val_type == TEMP_VAL_REG &&
+               ts_env == ts_arg->mem_base &&
+               ts_arg->mem_allocated);
+
+        *real_base = tcg_ctx.tcg_env;
+        *real_offset = ts_arg->mem_offset;
+    } else {
+        /* Temporaries use swap space in TCGContext. Since we already have
+           a 128-bit temporary we'll assume that the target supports 128-bit
+           loads and stores. */
+        *real_base = base;
+        *real_offset = slot * 16;
+        if (is_read) {
+            tcg_gen_st_v128(tmp, base, slot * 16);
+        }
+    }
+}
+
+/* Find a memory location for 64-bit vector TCG variable. */
+void tcg_v64_to_ptr(TCGv_v64 tmp, TCGv_ptr base, int slot,
+                    TCGv_ptr *real_base, intptr_t *real_offset, int is_read)
+{
+    int idx = GET_TCGV_V64(tmp);
+    assert(idx >= 0 && idx < tcg_ctx.nb_temps);
+    if (idx < tcg_ctx.nb_globals) {
+        /* Globals use their locations within CPUArchState. */
+        int env = GET_TCGV_PTR(tcg_ctx.tcg_env);
+        TCGTemp *ts_env = &tcg_ctx.temps[env];
+        TCGTemp *ts_arg = &tcg_ctx.temps[idx];
+
+        /* Sanity checks: global's memory locations must be addressed
+           relative to ENV. */
+        assert(ts_env->val_type == TEMP_VAL_REG &&
+               ts_env == ts_arg->mem_base &&
+               ts_arg->mem_allocated);
+
+        *real_base = tcg_ctx.tcg_env;
+        *real_offset = ts_arg->mem_offset;
+    } else {
+        /* Temporaries use swap space in TCGContext. Since we already have
+           a 128-bit temporary we'll assume that the target supports 128-bit
+           loads and stores. */
+        *real_base = base;
+        *real_offset = slot * 16;
+        if (is_read) {
+            tcg_gen_st_v64(tmp, base, slot * 16);
+        }
+    }
+}
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 250493b..3727be7 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -1195,6 +1195,10 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
     tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
 # define tcg_gen_addi_ptr(R, A, B) \
     tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_mov_ptr(R, B) \
+    tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(B))
+# define tcg_gen_movi_ptr(R, B) \
+    tcg_gen_movi_i32(TCGV_PTR_TO_NAT(R), (B))
 # define tcg_gen_ext_i32_ptr(R, A) \
     tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A))
 #else
@@ -1206,6 +1210,169 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
     tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
 # define tcg_gen_addi_ptr(R, A, B) \
     tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_mov_ptr(R, B) \
+    tcg_gen_mov_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(B))
+# define tcg_gen_movi_ptr(R, B) \
+    tcg_gen_movi_i64(TCGV_PTR_TO_NAT(R), (B))
 # define tcg_gen_ext_i32_ptr(R, A) \
     tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A))
 #endif /* UINTPTR_MAX == UINT32_MAX */
+
+/***************************************/
+/* 64-bit and 128-bit vector arithmetic.          */
+
+/* Find a memory location for 128-bit TCG variable. */
+void tcg_v128_to_ptr(TCGv_v128 tmp, TCGv_ptr base, int slot,
+                     TCGv_ptr *real_base, intptr_t *real_offset, int is_read);
+/* Find a memory location for 64-bit vector TCG variable. */
+void tcg_v64_to_ptr(TCGv_v64 tmp, TCGv_ptr base, int slot,
+                    TCGv_ptr *real_base, intptr_t *real_offset, int is_read);
+
+#define VTYPE(width) glue(TCG_TYPE_V, width)
+#define TEMP_TYPE(arg, temp_type) \
+            tcg_ctx.temps[glue(GET_TCGV_, temp_type)(arg)].type
+
+#define GEN_VECT_WRAPPER_HALVES(op, width, half_op, half_width, func)        \
+    static inline void glue(tcg_gen_, op)(glue(TCGv_v, width) res,           \
+                                            glue(TCGv_v, width) arg1,        \
+                                            glue(TCGv_v, width) arg2)        \
+    {                                                                        \
+        if (glue(TCG_TARGET_HAS_, op)) {                                     \
+            glue(tcg_gen_op3_v, width)(glue(INDEX_op_, op), res, arg1,       \
+                                       arg2);                                \
+        } else if (TEMP_TYPE(res, glue(V, width)) == VTYPE(half_width) &&    \
+                   glue(TCG_TARGET_HAS_, half_op)) {                         \
+            glue(TCGv_v, half_width) res_lo, res_hi, arg1_lo, arg1_hi,       \
+                                     arg2_lo, arg2_hi;                       \
+            res_lo = glue(tcg_temp_low_half_v, width)(res);                  \
+            res_hi = glue(tcg_temp_high_half_v, width)(res);                 \
+            arg1_lo = glue(tcg_temp_low_half_v, width)(arg1);                \
+            arg1_hi = glue(tcg_temp_high_half_v, width)(arg1);               \
+            arg2_lo = glue(tcg_temp_low_half_v, width)(arg2);                \
+            arg2_hi = glue(tcg_temp_high_half_v, width)(arg2);               \
+            glue(tcg_gen_op3_v, half_width)(glue(INDEX_op_, half_op),        \
+                                            res_lo, arg1_lo, arg2_lo);       \
+            glue(tcg_gen_op3_v, half_width)(glue(INDEX_op_, half_op),        \
+                                            res_hi, arg1_hi, arg2_hi);       \
+        } else {                                                             \
+            TCGv_ptr base =                                                  \
+                        MAKE_TCGV_PTR(tcg_ctx.frame_temp - tcg_ctx.temps);   \
+            TCGv_ptr t1 = tcg_temp_new_ptr();                                \
+            TCGv_ptr t2 = tcg_temp_new_ptr();                                \
+            TCGv_ptr t3 = tcg_temp_new_ptr();                                \
+            TCGv_ptr arg1p, arg2p, resp;                                     \
+            intptr_t arg1of, arg2of, resof;                                  \
+                                                                             \
+            glue(glue(tcg_v, width), _to_ptr)(arg1, base, 1,                 \
+                                            &arg1p, &arg1of, 1);             \
+            glue(glue(tcg_v, width), _to_ptr)(arg2, base, 2,                 \
+                                            &arg2p, &arg2of, 1);             \
+            glue(glue(tcg_v, width), _to_ptr)(res, base, 0, &resp, &resof,   \
+                                              0);                            \
+                                                                             \
+            tcg_gen_addi_ptr(t1, resp, resof);                               \
+            tcg_gen_addi_ptr(t2, arg1p, arg1of);                             \
+            tcg_gen_addi_ptr(t3, arg2p, arg2of);                             \
+            func(t1, t2, t3);                                                \
+                                                                             \
+            if ((intptr_t)res >= tcg_ctx.nb_globals) {                       \
+                glue(tcg_gen_ld_v, width)(res, base, 0);                     \
+            }                                                                \
+                                                                             \
+            tcg_temp_free_ptr(t1);                                           \
+            tcg_temp_free_ptr(t2);                                           \
+            tcg_temp_free_ptr(t3);                                           \
+        }                                                                    \
+    }
+
+#define GEN_VECT_WRAPPER(op, width, func)                                    \
+    static inline void glue(tcg_gen_, op)(glue(TCGv_v, width) res,           \
+                                            glue(TCGv_v, width) arg1,        \
+                                            glue(TCGv_v, width) arg2)        \
+    {                                                                        \
+        if (glue(TCG_TARGET_HAS_, op)) {                                     \
+            glue(tcg_gen_op3_v, width)(glue(INDEX_op_, op), res, arg1,       \
+                                       arg2);                                \
+        } else {                                                             \
+            TCGv_ptr base =                                                  \
+                        MAKE_TCGV_PTR(tcg_ctx.frame_temp - tcg_ctx.temps);   \
+            TCGv_ptr t1 = tcg_temp_new_ptr();                                \
+            TCGv_ptr t2 = tcg_temp_new_ptr();                                \
+            TCGv_ptr t3 = tcg_temp_new_ptr();                                \
+            TCGv_ptr arg1p, arg2p, resp;                                     \
+            intptr_t arg1of, arg2of, resof;                                  \
+                                                                             \
+            glue(glue(tcg_v, width), _to_ptr)(arg1, base, 1,                 \
+                                            &arg1p, &arg1of, 1);             \
+            glue(glue(tcg_v, width), _to_ptr)(arg2, base, 2,                 \
+                                            &arg2p, &arg2of, 1);             \
+            glue(glue(tcg_v, width), _to_ptr)(res, base, 0, &resp, &resof,   \
+                                              0);                            \
+                                                                             \
+            tcg_gen_addi_ptr(t1, resp, resof);                               \
+            tcg_gen_addi_ptr(t2, arg1p, arg1of);                             \
+            tcg_gen_addi_ptr(t3, arg2p, arg2of);                             \
+            func(t1, t2, t3);                                                \
+                                                                             \
+            if ((intptr_t)res >= tcg_ctx.nb_globals) {                       \
+                glue(tcg_gen_ld_v, width)(res, base, 0);                     \
+            }                                                                \
+                                                                             \
+            tcg_temp_free_ptr(t1);                                           \
+            tcg_temp_free_ptr(t2);                                           \
+            tcg_temp_free_ptr(t3);                                           \
+        }                                                                    \
+    }
+#define TCG_INTERNAL_OP(name, N, size, ld, st, op, type)                     \
+    static inline void glue(tcg_internal_, name)(TCGv_ptr resp,              \
+                                                 TCGv_ptr arg1p,             \
+                                                 TCGv_ptr arg2p)             \
+    {                                                                        \
+        int i;                                                               \
+        glue(TCGv_, type) tmp1, tmp2;                                        \
+                                                                             \
+        tmp1 = glue(tcg_temp_new_, type)();                                  \
+        tmp2 = glue(tcg_temp_new_, type)();                                  \
+                                                                             \
+        for (i = 0; i < N; i++) {                                            \
+            glue(tcg_gen_, ld)(tmp1, arg1p, i * size);                       \
+            glue(tcg_gen_, ld)(tmp2, arg2p, i * size);                       \
+            glue(tcg_gen_, op)(tmp1, tmp1, tmp2);                            \
+            glue(tcg_gen_, st)(tmp1, resp, i * size);                        \
+        }                                                                    \
+                                                                             \
+        glue(tcg_temp_free_, type)(tmp1);                                    \
+        glue(tcg_temp_free_, type)(tmp2);                                    \
+    }
+
+#define TCG_INTERNAL_OP_8(name, N, op) \
+    TCG_INTERNAL_OP(name, N, 1, ld8u_i32, st8_i32, op, i32)
+#define TCG_INTERNAL_OP_16(name, N, op) \
+    TCG_INTERNAL_OP(name, N, 2, ld16u_i32, st16_i32, op, i32)
+#define TCG_INTERNAL_OP_32(name, N, op) \
+    TCG_INTERNAL_OP(name, N, 4, ld_i32, st_i32, op, i32)
+#define TCG_INTERNAL_OP_64(name, N, op) \
+    TCG_INTERNAL_OP(name, N, 8, ld_i64, st_i64, op, i64)
+
+TCG_INTERNAL_OP_8(add_i8x16, 16, add_i32)
+TCG_INTERNAL_OP_16(add_i16x8, 8, add_i32)
+TCG_INTERNAL_OP_32(add_i32x4, 4, add_i32)
+TCG_INTERNAL_OP_64(add_i64x2, 2, add_i64)
+
+TCG_INTERNAL_OP_8(add_i8x8, 8, add_i32)
+TCG_INTERNAL_OP_16(add_i16x4, 4, add_i32)
+TCG_INTERNAL_OP_32(add_i32x2, 2, add_i32)
+TCG_INTERNAL_OP_64(add_i64x1, 1, add_i64)
+
+GEN_VECT_WRAPPER_HALVES(add_i8x16, 128, add_i8x8, 64, tcg_internal_add_i8x16)
+GEN_VECT_WRAPPER_HALVES(add_i16x8, 128, add_i16x4, 64, tcg_internal_add_i16x8)
+GEN_VECT_WRAPPER_HALVES(add_i32x4, 128, add_i32x2, 64, tcg_internal_add_i32x4)
+GEN_VECT_WRAPPER_HALVES(add_i64x2, 128, add_i64x1, 64, tcg_internal_add_i64x2)
+
+GEN_VECT_WRAPPER(add_i8x8, 64, tcg_internal_add_i8x8)
+GEN_VECT_WRAPPER(add_i16x4, 64, tcg_internal_add_i16x4)
+GEN_VECT_WRAPPER(add_i32x2, 64, tcg_internal_add_i32x2)
+GEN_VECT_WRAPPER(add_i64x1, 64, tcg_internal_add_i64x1)
+
+#undef VTYPE
+#undef BASE_TYPE
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 2365c97..4c8f195 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -206,6 +206,18 @@ DEF(ld_v128, 1, 1, 1, IMPL128)
 DEF(st_v64, 0, 2, 1, IMPLV64)
 DEF(ld_v64, 1, 1, 1, IMPLV64)
 
+/* 128-bit vector arith */
+DEF(add_i8x16, 1, 2, 0, IMPL128 | IMPL(TCG_TARGET_HAS_add_i8x16))
+DEF(add_i16x8, 1, 2, 0, IMPL128 | IMPL(TCG_TARGET_HAS_add_i16x8))
+DEF(add_i32x4, 1, 2, 0, IMPL128 | IMPL(TCG_TARGET_HAS_add_i32x4))
+DEF(add_i64x2, 1, 2, 0, IMPL128 | IMPL(TCG_TARGET_HAS_add_i64x2))
+
+/* 64-bit vector arith */
+DEF(add_i8x8, 1, 2, 0, IMPLV64 | IMPL(TCG_TARGET_HAS_add_i8x8))
+DEF(add_i16x4, 1, 2, 0, IMPLV64 | IMPL(TCG_TARGET_HAS_add_i16x4))
+DEF(add_i32x2, 1, 2, 0, IMPLV64 | IMPL(TCG_TARGET_HAS_add_i32x2))
+DEF(add_i64x1, 1, 2, 0, IMPLV64 | IMPL(TCG_TARGET_HAS_add_i64x1))
+
 /* QEMU specific */
 DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
     TCG_OPF_NOT_PRESENT)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a8df040..a23f739 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -712,6 +712,18 @@ TCGv_v128 tcg_temp_new_internal_v128(int temp_local)
     return MAKE_TCGV_V128(idx);
 }
 
+int tcg_temp_half_internal(int arg, TCGType type, int is_high)
+{
+    const TCGTemp *ts = &tcg_ctx.temps[arg];
+    tcg_debug_assert(ts->type != ts->base_type);
+    tcg_debug_assert(tcg_type_size(type) > tcg_type_size(ts->type));
+    tcg_debug_assert(tcg_type_size(type) <= tcg_type_size(ts->base_type));
+    if (is_high) {
+        arg += tcg_type_size(type) / tcg_type_size(ts->type) / 2;
+    }
+    return arg;
+}
+
 static void tcg_temp_free_internal(int idx)
 {
     TCGContext *s = &tcg_ctx;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 01299cc..fd43f15 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -156,6 +156,34 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_rem_i64          0
 #endif
 
+/* 64-bit vector */
+#ifndef TCG_TARGET_HAS_add_i8x8
+#define TCG_TARGET_HAS_add_i8x8         0
+#endif
+#ifndef TCG_TARGET_HAS_add_i16x4
+#define TCG_TARGET_HAS_add_i16x4        0
+#endif
+#ifndef TCG_TARGET_HAS_add_i32x2
+#define TCG_TARGET_HAS_add_i32x2        0
+#endif
+#ifndef TCG_TARGET_HAS_add_i64x1
+#define TCG_TARGET_HAS_add_i64x1        0
+#endif
+
+/* 128-bit vector */
+#ifndef TCG_TARGET_HAS_add_i8x16
+#define TCG_TARGET_HAS_add_i8x16        0
+#endif
+#ifndef TCG_TARGET_HAS_add_i16x8
+#define TCG_TARGET_HAS_add_i16x8        0
+#endif
+#ifndef TCG_TARGET_HAS_add_i32x4
+#define TCG_TARGET_HAS_add_i32x4        0
+#endif
+#ifndef TCG_TARGET_HAS_add_i64x2
+#define TCG_TARGET_HAS_add_i64x2        0
+#endif
+
 /* For 32-bit targets, some sort of unsigned widening multiply is required.  */
 #if TCG_TARGET_REG_BITS == 32 \
     && !(defined(TCG_TARGET_HAS_mulu2_i32) \
@@ -761,6 +789,7 @@ struct TCGContext {
     void *code_gen_buffer;
     size_t code_gen_buffer_size;
     void *code_gen_ptr;
+    uint8_t v128_swap[16 * 3];
 
     /* Threshold to flush the translated code buffer.  */
     void *code_gen_highwater;
@@ -938,6 +967,20 @@ static inline TCGv_v128 tcg_temp_local_new_v128(void)
     return tcg_temp_new_internal_v128(1);
 }
 
+int tcg_temp_half_internal(int arg, TCGType type, int is_high);
+
+static inline TCGv_v64 tcg_temp_low_half_v128(TCGv_v128 arg)
+{
+    int idx = tcg_temp_half_internal(GET_TCGV_V128(arg), TCG_TYPE_V128, 0);
+    return MAKE_TCGV_V64(idx);
+}
+
+static inline TCGv_v64 tcg_temp_high_half_v128(TCGv_v128 arg)
+{
+    int idx = tcg_temp_half_internal(GET_TCGV_V128(arg), TCG_TYPE_V128, 1);
+    return MAKE_TCGV_V64(idx);
+}
+
 #if defined(CONFIG_DEBUG_TCG)
 /* If you call tcg_clear_temp_count() at the start of a section of
  * code which is not supposed to leak any TCG temporaries, then
-- 
2.1.4

  parent reply	other threads:[~2017-02-02 14:35 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-02 14:34 [Qemu-devel] [PATCH v2.1 00/20] Emulate guest vector operations with host vector operations Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 01/21] tcg: add support for 128bit vector type Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 02/21] tcg: add support for 64bit " Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 03/21] tcg: support representing vector type with smaller vector or scalar types Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 04/21] tcg: add ld_v128, ld_v64, st_v128 and st_v64 opcodes Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 05/21] tcg: add simple alias analysis Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 06/21] tcg: use results of alias analysis in liveness analysis Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 07/21] tcg: allow globals to overlap Kirill Batuzov
2017-02-02 14:34 ` Kirill Batuzov [this message]
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 09/21] target/arm: support access to vector guest registers as globals Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 10/21] target/arm: use vector opcode to handle vadd.<size> instruction Kirill Batuzov
2017-02-09 13:19   ` Philippe Mathieu-Daudé
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 11/21] tcg/i386: add support for vector opcodes Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 12/21] tcg/i386: support 64-bit vector operations Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 13/21] tcg/i386: support remaining vector addition operations Kirill Batuzov
     [not found]   ` <2089cbe3-0e9b-fae2-0e35-224f2765dc28@amsat.org>
     [not found]     ` <32a902a1-e8c7-c2f7-ac66-148e02ee0b2d@amsat.org>
2017-02-21 13:29       ` Kirill Batuzov
2017-02-21 16:21         ` Alex Bennée
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 14/21] tcg: do not rely on exact values of MO_BSWAP or MO_SIGN in backend Kirill Batuzov
2017-05-05 13:59   ` Alex Bennée
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 15/21] target/aarch64: do not check for non-existent TCGMemOp Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 16/21] tcg: introduce new TCGMemOp - MO_128 Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 17/21] tcg: introduce qemu_ld_v128 and qemu_st_v128 opcodes Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 18/21] softmmu: create helpers for vector loads Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 19/21] tcg/i386: add support for qemu_ld_v128/qemu_st_v128 ops Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 20/21] target/arm: load two consecutive 64-bits vector regs as a 128-bit vector reg Kirill Batuzov
2017-02-02 14:34 ` [Qemu-devel] [PATCH v2.1 21/21] tcg/README: update README to include information about vector opcodes Kirill Batuzov
2017-02-02 15:25 ` [Qemu-devel] [PATCH v2.1 00/20] Emulate guest vector operations with host vector operations no-reply
2017-02-21 12:19 ` Kirill Batuzov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1486046099-17726-9-git-send-email-batuzovk@ispras.ru \
    --to=batuzovk@ispras.ru \
    --cc=alex.bennee@linaro.org \
    --cc=balrogg@gmail.com \
    --cc=crosthwaite.peter@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).