* [PATCH RESEND v5 01/57] target/loongarch: Renamed lsx*.c to vec* .c
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 16:37 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 02/57] target/loongarch: Implement gvec_*_vl functions Song Gao
` (55 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Renamed lsx_helper.c to vec_helper.c and trans_lsx.c.inc to trans_vec.c.inc
So LASX can used them.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/translate.c | 2 +-
target/loongarch/{lsx_helper.c => vec_helper.c} | 2 +-
.../loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} | 2 +-
target/loongarch/meson.build | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
rename target/loongarch/{lsx_helper.c => vec_helper.c} (99%)
rename target/loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} (99%)
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index fd393ed76d..288727181b 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -261,7 +261,7 @@ static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr)
#include "insn_trans/trans_fmemory.c.inc"
#include "insn_trans/trans_branch.c.inc"
#include "insn_trans/trans_privileged.c.inc"
-#include "insn_trans/trans_lsx.c.inc"
+#include "insn_trans/trans_vec.c.inc"
static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
{
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/vec_helper.c
similarity index 99%
rename from target/loongarch/lsx_helper.c
rename to target/loongarch/vec_helper.c
index 9571f0aef0..73f0974744 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * QEMU LoongArch LSX helper functions.
+ * QEMU LoongArch vector helper functions.
*
* Copyright (c) 2022-2023 Loongson Technology Corporation Limited
*/
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
similarity index 99%
rename from target/loongarch/insn_trans/trans_lsx.c.inc
rename to target/loongarch/insn_trans/trans_vec.c.inc
index 5fbf2718f7..aed5bac5bc 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * LSX translate functions
+ * LoongArch vector translate functions
* Copyright (c) 2022-2023 Loongson Technology Corporation Limited
*/
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
index b7a27df5a9..7fbf045a5d 100644
--- a/target/loongarch/meson.build
+++ b/target/loongarch/meson.build
@@ -11,7 +11,7 @@ loongarch_tcg_ss.add(files(
'op_helper.c',
'translate.c',
'gdbstub.c',
- 'lsx_helper.c',
+ 'vec_helper.c',
))
loongarch_tcg_ss.add(zlib)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 01/57] target/loongarch: Renamed lsx*.c to vec* .c
2023-09-07 8:31 ` [PATCH RESEND v5 01/57] target/loongarch: Renamed lsx*.c to vec* .c Song Gao
@ 2023-09-07 16:37 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-07 16:37 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> Renamed lsx_helper.c to vec_helper.c and trans_lsx.c.inc to trans_vec.c.inc
> So LASX can used them.
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/translate.c | 2 +-
> target/loongarch/{lsx_helper.c => vec_helper.c} | 2 +-
> .../loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} | 2 +-
> target/loongarch/meson.build | 2 +-
> 4 files changed, 4 insertions(+), 4 deletions(-)
> rename target/loongarch/{lsx_helper.c => vec_helper.c} (99%)
> rename target/loongarch/insn_trans/{trans_lsx.c.inc => trans_vec.c.inc} (99%)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 02/57] target/loongarch: Implement gvec_*_vl functions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 01/57] target/loongarch: Renamed lsx*.c to vec* .c Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 17:19 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 03/57] target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions Song Gao
` (54 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Using gvec_*_vl functions hides oprsz. We can use gvec_v* for oprsz 16.
and gvec_v* for oprsz 32.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insn_trans/trans_vec.c.inc | 68 +++++++++++++--------
1 file changed, 44 insertions(+), 24 deletions(-)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index aed5bac5bc..aeeb2df41c 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -76,34 +76,58 @@ static bool gen_cv(DisasContext *ctx, arg_cv *a,
return true;
}
+static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
+ uint32_t oprsz, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t))
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
+ uint32_t vk_ofs = vec_full_offset(a->vk);
+
+ func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
+ return true;
+}
+
static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t))
{
- uint32_t vd_ofs, vj_ofs, vk_ofs;
-
CHECK_SXE;
+ return gvec_vvv_vl(ctx, a, 16, mop, func);
+}
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
- vk_ofs = vec_full_offset(a->vk);
- func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
+static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
+ uint32_t oprsz, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t))
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
+
+ func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
return true;
}
+
static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t))
{
- uint32_t vd_ofs, vj_ofs;
-
CHECK_SXE;
+ return gvec_vv_vl(ctx, a, 16, mop, func);
+}
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
+static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
+ uint32_t oprsz, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
- func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8);
+ func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
return true;
}
@@ -111,28 +135,24 @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
int64_t, uint32_t, uint32_t))
{
- uint32_t vd_ofs, vj_ofs;
-
CHECK_SXE;
+ return gvec_vv_i_vl(ctx, a, 16, mop, func);
+}
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
+static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
+ uint32_t oprsz, MemOp mop)
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
- func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8);
+ tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
return true;
}
static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
{
- uint32_t vd_ofs, vj_ofs;
-
CHECK_SXE;
-
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
-
- tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8);
- return true;
+ return gvec_subi_vl(ctx, a, 16, mop);
}
TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 02/57] target/loongarch: Implement gvec_*_vl functions
2023-09-07 8:31 ` [PATCH RESEND v5 02/57] target/loongarch: Implement gvec_*_vl functions Song Gao
@ 2023-09-07 17:19 ` Richard Henderson
2023-09-08 3:21 ` gaosong
0 siblings, 1 reply; 87+ messages in thread
From: Richard Henderson @ 2023-09-07 17:19 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> Using gvec_*_vl functions hides oprsz. We can use gvec_v* for oprsz 16.
> and gvec_v* for oprsz 32.
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/insn_trans/trans_vec.c.inc | 68 +++++++++++++--------
> 1 file changed, 44 insertions(+), 24 deletions(-)
The description above is not quite right. How about:
Create gvec_*_vl functions in order to hide oprsz.
This is used by gvec_v* functions for oprsz 16,
and will be used by gvec_x* functions for oprsz 32.
The code is correct.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 02/57] target/loongarch: Implement gvec_*_vl functions
2023-09-07 17:19 ` Richard Henderson
@ 2023-09-08 3:21 ` gaosong
0 siblings, 0 replies; 87+ messages in thread
From: gaosong @ 2023-09-08 3:21 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: maobibo
在 2023/9/8 上午1:19, Richard Henderson 写道:
> On 9/7/23 01:31, Song Gao wrote:
>> Using gvec_*_vl functions hides oprsz. We can use gvec_v* for oprsz 16.
>> and gvec_v* for oprsz 32.
>>
>> Signed-off-by: Song Gao<gaosong@loongson.cn>
>> ---
>> target/loongarch/insn_trans/trans_vec.c.inc | 68 +++++++++++++--------
>> 1 file changed, 44 insertions(+), 24 deletions(-)
>
> The description above is not quite right. How about:
>
> Create gvec_*_vl functions in order to hide oprsz.
> This is used by gvec_v* functions for oprsz 16,
> and will be used by gvec_x* functions for oprsz 32.
>
Yes, I will correct it.
Thanks.
Song Gao
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 03/57] target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 01/57] target/loongarch: Renamed lsx*.c to vec* .c Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 02/57] target/loongarch: Implement gvec_*_vl functions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 17:34 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 04/57] target/loongarch: Use gen_helper_gvec_4 for 4OP " Song Gao
` (53 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 16 +++++-----
target/loongarch/vec_helper.c | 12 +++----
target/loongarch/insn_trans/trans_vec.c.inc | 35 ++++++++++++++++-----
3 files changed, 41 insertions(+), 22 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index ffb1e0b0bf..ead16567c2 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -528,14 +528,14 @@ DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
-DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 73f0974744..3a7a620227 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2129,14 +2129,14 @@ DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
#define DO_4OP_F(NAME, BIT, E, FN, flags) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \
+ CPULoongArchState *env, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- VReg *Va = &(env->fpr[va].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ VReg *Va = (VReg *)va; \
\
vec_clear_cause(env); \
for (i = 0; i < LSX_LEN/BIT; i++) { \
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index aeeb2df41c..85bc8670a7 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -15,6 +15,25 @@
#define CHECK_SXE
#endif
+static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
+ gen_helper_gvec_4_ptr *fn)
+{
+ tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ vec_full_offset(a->va),
+ cpu_env,
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+
+static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ CHECK_SXE;
+ return gen_vvvv_ptr_vl(ctx, a, 16, fn);
+}
+
static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32,
TCGv_i32, TCGv_i32))
@@ -3634,14 +3653,14 @@ TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d)
TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s)
TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d)
-TRANS(vfmadd_s, LSX, gen_vvvv, gen_helper_vfmadd_s)
-TRANS(vfmadd_d, LSX, gen_vvvv, gen_helper_vfmadd_d)
-TRANS(vfmsub_s, LSX, gen_vvvv, gen_helper_vfmsub_s)
-TRANS(vfmsub_d, LSX, gen_vvvv, gen_helper_vfmsub_d)
-TRANS(vfnmadd_s, LSX, gen_vvvv, gen_helper_vfnmadd_s)
-TRANS(vfnmadd_d, LSX, gen_vvvv, gen_helper_vfnmadd_d)
-TRANS(vfnmsub_s, LSX, gen_vvvv, gen_helper_vfnmsub_s)
-TRANS(vfnmsub_d, LSX, gen_vvvv, gen_helper_vfnmsub_d)
+TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
+TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
+TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
+TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
+TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
+TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
+TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
+TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s)
TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 03/57] target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions
2023-09-07 8:31 ` [PATCH RESEND v5 03/57] target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions Song Gao
@ 2023-09-07 17:34 ` Richard Henderson
2023-09-08 3:22 ` gaosong
0 siblings, 1 reply; 87+ messages in thread
From: Richard Henderson @ 2023-09-07 17:34 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> +static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
> + gen_helper_gvec_4_ptr *fn)
> +{
> + tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
> + vec_full_offset(a->vj),
> + vec_full_offset(a->vk),
> + vec_full_offset(a->va),
> + cpu_env,
> + oprsz, ctx->vl / 8, oprsz, fn);
^^^^^
This next to last argument is 'data', which is unused for this case.
Just use 0 here.
Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 03/57] target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions
2023-09-07 17:34 ` Richard Henderson
@ 2023-09-08 3:22 ` gaosong
0 siblings, 0 replies; 87+ messages in thread
From: gaosong @ 2023-09-08 3:22 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: maobibo
在 2023/9/8 上午1:34, Richard Henderson 写道:
> On 9/7/23 01:31, Song Gao wrote:
>> +static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t
>> oprsz,
>> + gen_helper_gvec_4_ptr *fn)
>> +{
>> + tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
>> + vec_full_offset(a->vj),
>> + vec_full_offset(a->vk),
>> + vec_full_offset(a->va),
>> + cpu_env,
>> + oprsz, ctx->vl / 8, oprsz, fn);
> ^^^^^
>
> This next to last argument is 'data', which is unused for this case.
> Just use 0 here.
>
Got it, I will correct the other 6 similar patches.
Thanks.
Song Gao
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 04/57] target/loongarch: Use gen_helper_gvec_4 for 4OP vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (2 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 03/57] target/loongarch: Use gen_helper_gvec_4_ptr for 4OP + env vector instructions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 0:47 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 05/57] target/loongarch: Use gen_helper_gvec_3_ptr for 3OP + env " Song Gao
` (52 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 2 +-
target/loongarch/vec_helper.c | 11 +++++------
target/loongarch/insn_trans/trans_vec.c.inc | 22 ++++++++++++---------
3 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index ead16567c2..727ccfb32c 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -682,7 +682,7 @@ DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
-DEF_HELPER_5(vshuf_b, void, env, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 3a7a620227..7078c4c845 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2899,15 +2899,14 @@ VILVH(vilvh_h, 32, H)
VILVH(vilvh_w, 64, W)
VILVH(vilvh_d, 128, D)
-void HELPER(vshuf_b)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va)
+void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
{
int i, m;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
- VReg *Va = &(env->fpr[va].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ VReg *Va = (VReg *)va;
m = LSX_LEN/8;
for (i = 0; i < m ; i++) {
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 85bc8670a7..6f45296987 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -34,18 +34,22 @@ static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
return gen_vvvv_ptr_vl(ctx, a, 16, fn);
}
-static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32,
- TCGv_i32, TCGv_i32))
+static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
+ gen_helper_gvec_4 *fn)
{
- TCGv_i32 vd = tcg_constant_i32(a->vd);
- TCGv_i32 vj = tcg_constant_i32(a->vj);
- TCGv_i32 vk = tcg_constant_i32(a->vk);
- TCGv_i32 va = tcg_constant_i32(a->va);
+ tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ vec_full_offset(a->va),
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4 *fn)
+{
CHECK_SXE;
- func(cpu_env, vd, vj, vk, va);
- return true;
+ return gen_vvvv_vl(ctx, a, 16, fn);
}
static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 05/57] target/loongarch: Use gen_helper_gvec_3_ptr for 3OP + env vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (3 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 04/57] target/loongarch: Use gen_helper_gvec_4 for 4OP " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 0:51 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 06/57] target/loongarch: Use gen_helper_gvec_3 for 3OP " Song Gao
` (51 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 48 +++++++--------
target/loongarch/vec_helper.c | 50 ++++++++--------
target/loongarch/insn_trans/trans_vec.c.inc | 66 +++++++++++++--------
3 files changed, 91 insertions(+), 73 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 727ccfb32c..bcf82597aa 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -519,14 +519,14 @@ DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_6(vfmadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_6(vfmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
@@ -537,15 +537,15 @@ DEF_HELPER_FLAGS_6(vfnmadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i3
DEF_HELPER_FLAGS_6(vfnmsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_6(vfnmsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, env, i32)
-DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfmaxa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_3(vflogb_s, void, env, i32, i32)
DEF_HELPER_3(vflogb_d, void, env, i32, i32)
@@ -564,8 +564,8 @@ DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
-DEF_HELPER_4(vfcvt_h_s, void, env, i32, i32, i32)
-DEF_HELPER_4(vfcvt_s_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
@@ -592,11 +592,11 @@ DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
-DEF_HELPER_4(vftintrne_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftintrz_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftintrp_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftintrm_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vftint_w_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
@@ -614,7 +614,7 @@ DEF_HELPER_3(vffint_s_wu, void, env, i32, i32)
DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
-DEF_HELPER_4(vffint_s_l, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vseqi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 7078c4c845..eab94a8b76 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2096,13 +2096,13 @@ static inline void vec_clear_cause(CPULoongArchState *env)
}
#define DO_3OP_F(NAME, BIT, E, FN) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
+ CPULoongArchState *env, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
vec_clear_cause(env); \
for (i = 0; i < LSX_LEN/BIT; i++) { \
@@ -2326,14 +2326,14 @@ void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
*Vd = temp;
}
-void HELPER(vfcvt_h_s)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
vec_clear_cause(env);
for(i = 0; i < LSX_LEN/32; i++) {
@@ -2344,14 +2344,14 @@ void HELPER(vfcvt_h_s)(CPULoongArchState *env,
*Vd = temp;
}
-void HELPER(vfcvt_s_d)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
vec_clear_cause(env);
for(i = 0; i < LSX_LEN/64; i++) {
@@ -2482,14 +2482,14 @@ FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
#define FTINT_W_D(NAME, FN) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
+ CPULoongArchState *env, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
vec_clear_cause(env); \
for (i = 0; i < 2; i++) { \
@@ -2606,14 +2606,14 @@ void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
*Vd = temp;
}
-void HELPER(vffint_s_l)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
vec_clear_cause(env);
for (i = 0; i < 2; i++) {
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 6f45296987..eae1929f44 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -52,6 +52,24 @@ static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
return gen_vvvv_vl(ctx, a, 16, fn);
}
+static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+ gen_helper_gvec_3_ptr *fn)
+{
+ tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ cpu_env,
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+
+static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ CHECK_SXE;
+ return gen_vvv_ptr_vl(ctx, a, 16, fn);
+}
+
static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
{
@@ -3648,14 +3666,14 @@ TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
-TRANS(vfadd_s, LSX, gen_vvv, gen_helper_vfadd_s)
-TRANS(vfadd_d, LSX, gen_vvv, gen_helper_vfadd_d)
-TRANS(vfsub_s, LSX, gen_vvv, gen_helper_vfsub_s)
-TRANS(vfsub_d, LSX, gen_vvv, gen_helper_vfsub_d)
-TRANS(vfmul_s, LSX, gen_vvv, gen_helper_vfmul_s)
-TRANS(vfmul_d, LSX, gen_vvv, gen_helper_vfmul_d)
-TRANS(vfdiv_s, LSX, gen_vvv, gen_helper_vfdiv_s)
-TRANS(vfdiv_d, LSX, gen_vvv, gen_helper_vfdiv_d)
+TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
+TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
+TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
+TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
+TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
+TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
+TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
+TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
@@ -3666,15 +3684,15 @@ TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
-TRANS(vfmax_s, LSX, gen_vvv, gen_helper_vfmax_s)
-TRANS(vfmax_d, LSX, gen_vvv, gen_helper_vfmax_d)
-TRANS(vfmin_s, LSX, gen_vvv, gen_helper_vfmin_s)
-TRANS(vfmin_d, LSX, gen_vvv, gen_helper_vfmin_d)
+TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
+TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
+TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
+TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
-TRANS(vfmaxa_s, LSX, gen_vvv, gen_helper_vfmaxa_s)
-TRANS(vfmaxa_d, LSX, gen_vvv, gen_helper_vfmaxa_d)
-TRANS(vfmina_s, LSX, gen_vvv, gen_helper_vfmina_s)
-TRANS(vfmina_d, LSX, gen_vvv, gen_helper_vfmina_d)
+TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
+TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
+TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
+TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s)
TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d)
@@ -3693,8 +3711,8 @@ TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h)
TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h)
TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s)
TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s)
-TRANS(vfcvt_h_s, LSX, gen_vvv, gen_helper_vfcvt_h_s)
-TRANS(vfcvt_s_d, LSX, gen_vvv, gen_helper_vfcvt_s_d)
+TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
+TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s)
TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d)
@@ -3721,11 +3739,11 @@ TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s)
TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d)
TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s)
TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d)
-TRANS(vftintrne_w_d, LSX, gen_vvv, gen_helper_vftintrne_w_d)
-TRANS(vftintrz_w_d, LSX, gen_vvv, gen_helper_vftintrz_w_d)
-TRANS(vftintrp_w_d, LSX, gen_vvv, gen_helper_vftintrp_w_d)
-TRANS(vftintrm_w_d, LSX, gen_vvv, gen_helper_vftintrm_w_d)
-TRANS(vftint_w_d, LSX, gen_vvv, gen_helper_vftint_w_d)
+TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
+TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
+TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
+TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
+TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s)
TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s)
TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s)
@@ -3743,7 +3761,7 @@ TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu)
TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu)
TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w)
TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w)
-TRANS(vffint_s_l, LSX, gen_vvv, gen_helper_vffint_s_l)
+TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 06/57] target/loongarch: Use gen_helper_gvec_3 for 3OP vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (4 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 05/57] target/loongarch: Use gen_helper_gvec_3_ptr for 3OP + env " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 07/57] target/loongarch: Use gen_helper_gvec_2_ptr for 2OP + env " Song Gao
` (50 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 214 +++++-----
target/loongarch/vec_helper.c | 444 +++++++++-----------
target/loongarch/insn_trans/trans_vec.c.inc | 19 +-
3 files changed, 326 insertions(+), 351 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index bcf82597aa..4b681e948f 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -133,22 +133,22 @@ DEF_HELPER_1(idle, void, env)
#endif
/* LoongArch LSX */
-DEF_HELPER_4(vhaddw_h_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_w_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_d_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_q_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_hu_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_wu_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_du_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhaddw_qu_du, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_h_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_w_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_d_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_q_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_hu_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_wu_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_du_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vhsubw_qu_du, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vhaddw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhaddw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vhsubw_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vaddwev_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vaddwev_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -305,22 +305,22 @@ DEF_HELPER_FLAGS_4(vmaddwod_h_bu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vmaddwod_w_hu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vmaddwod_d_wu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vdiv_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vdiv_du, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_wu, void, env, i32, i32, i32)
-DEF_HELPER_4(vmod_du, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vdiv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vdiv_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_bu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_wu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vmod_du, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsat_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vsat_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
@@ -363,30 +363,30 @@ DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
-DEF_HELPER_4(vsrlr_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlr_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlr_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlr_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrar_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrln_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrln_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrln_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsran_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsran_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsran_w_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
@@ -397,12 +397,12 @@ DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrn_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarn_w_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
@@ -413,18 +413,18 @@ DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrln_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
@@ -443,18 +443,18 @@ DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrn_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarn_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrlrn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
@@ -514,8 +514,8 @@ DEF_HELPER_FLAGS_4(vbitrevi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vbitrevi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
@@ -655,37 +655,37 @@ DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
-DEF_HELPER_4(vpackev_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackev_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackev_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackev_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpackod_d, void, env, i32, i32, i32)
-
-DEF_HELPER_4(vpickev_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickev_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickev_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickev_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vpickod_d, void, env, i32, i32, i32)
-
-DEF_HELPER_4(vilvl_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvl_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvl_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvl_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vilvh_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpackod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vpickev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickev_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vpickod_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(vilvl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvl_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vilvh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vshuf_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index eab94a8b76..15b361c6b3 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -17,13 +17,12 @@
#define DO_SUB(a, b) (a - b)
#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->E1(0)) TD; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
@@ -35,12 +34,11 @@ DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
-void HELPER(vhaddw_q_d)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
}
@@ -49,12 +47,11 @@ DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
-void HELPER(vhsubw_q_d)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
}
@@ -63,12 +60,11 @@ DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
-void HELPER(vhaddw_qu_du)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
int128_make64((uint64_t)Vk->D(0)));
@@ -78,12 +74,11 @@ DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
-void HELPER(vhsubw_qu_du)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t vk)
+void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
- VReg *Vk = &(env->fpr[vk].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
int128_make64((uint64_t)Vk->D(0)));
@@ -564,17 +559,16 @@ VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
-#define VDIV(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
- } \
+#define VDIV(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
+ } \
}
VDIV(vdiv_b, 8, B, DO_DIV)
@@ -854,13 +848,12 @@ do_vsrlr(W, uint32_t)
do_vsrlr(D, uint64_t)
#define VSRLR(NAME, BIT, T, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
@@ -906,13 +899,12 @@ do_vsrar(W, int32_t)
do_vsrar(D, int64_t)
#define VSRAR(NAME, BIT, T, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
@@ -945,13 +937,12 @@ VSRARI(vsrari_d, 64, D)
#define R_SHIFT(a, b) (a >> b)
#define VSRLN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
@@ -963,19 +954,18 @@ VSRLN(vsrln_b_h, 16, uint16_t, B, H)
VSRLN(vsrln_h_w, 32, uint32_t, H, W)
VSRLN(vsrln_w_d, 64, uint64_t, W, D)
-#define VSRAN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
- } \
- Vd->D(1) = 0; \
+#define VSRAN(NAME, BIT, T, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
+ } \
+ Vd->D(1) = 0; \
}
VSRAN(vsran_b_h, 16, uint16_t, B, H)
@@ -1057,13 +1047,12 @@ VSRANI(vsrani_h_w, 32, H, W)
VSRANI(vsrani_w_d, 64, W, D)
#define VSRLRN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
@@ -1076,13 +1065,12 @@ VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
#define VSRARN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
@@ -1205,13 +1193,12 @@ SSRLNS(H, uint32_t, int32_t, uint16_t)
SSRLNS(W, uint64_t, int64_t, uint32_t)
#define VSSRLN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
@@ -1248,13 +1235,12 @@ SSRANS(H, int32_t, int16_t)
SSRANS(W, int64_t, int32_t)
#define VSSRAN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
@@ -1289,13 +1275,12 @@ SSRLNU(H, uint32_t, uint16_t, int32_t)
SSRLNU(W, uint64_t, uint32_t, int64_t)
#define VSSRLNU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
@@ -1333,13 +1318,12 @@ SSRANU(H, uint32_t, uint16_t, int32_t)
SSRANU(W, uint64_t, uint32_t, int64_t)
#define VSSRANU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
@@ -1581,13 +1565,12 @@ SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
#define VSSRLRN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
@@ -1621,13 +1604,12 @@ SSRARNS(H, W, int32_t, int16_t)
SSRARNS(W, D, int64_t, int32_t)
#define VSSRARN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
@@ -1660,13 +1642,12 @@ SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
#define VSSRLRNU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
@@ -1702,13 +1683,12 @@ SSRARNU(H, W, uint32_t, uint16_t, int32_t)
SSRARNU(W, D, uint64_t, uint32_t, int64_t)
#define VSSRARNU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
@@ -2023,22 +2003,21 @@ DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
-#define VFRSTP(NAME, BIT, MASK, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i, m; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- if (Vj->E(i) < 0) { \
- break; \
- } \
- } \
- m = Vk->E(0) & MASK; \
- Vd->E(m) = i; \
+#define VFRSTP(NAME, BIT, MASK, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, m; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ if (Vj->E(i) < 0) { \
+ break; \
+ } \
+ } \
+ m = Vk->E(0) & MASK; \
+ Vd->E(m) = i; \
}
VFRSTP(vfrstp_b, 8, 0xf, B)
@@ -2767,21 +2746,20 @@ SETALLNEZ(vsetallnez_h, MO_16)
SETALLNEZ(vsetallnez_w, MO_32)
SETALLNEZ(vsetallnez_d, MO_64)
-#define VPACKEV(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(2 * i + 1) = Vj->E(2 * i); \
- temp.E(2 *i) = Vk->E(2 * i); \
- } \
- *Vd = temp; \
+#define VPACKEV(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(2 * i + 1) = Vj->E(2 * i); \
+ temp.E(2 *i) = Vk->E(2 * i); \
+ } \
+ *Vd = temp; \
}
VPACKEV(vpackev_b, 16, B)
@@ -2789,21 +2767,20 @@ VPACKEV(vpackev_h, 32, H)
VPACKEV(vpackev_w, 64, W)
VPACKEV(vpackev_d, 128, D)
-#define VPACKOD(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
- temp.E(2 * i) = Vk->E(2 * i + 1); \
- } \
- *Vd = temp; \
+#define VPACKOD(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
+ temp.E(2 * i) = Vk->E(2 * i + 1); \
+ } \
+ *Vd = temp; \
}
VPACKOD(vpackod_b, 16, B)
@@ -2811,21 +2788,20 @@ VPACKOD(vpackod_h, 32, H)
VPACKOD(vpackod_w, 64, W)
VPACKOD(vpackod_d, 128, D)
-#define VPICKEV(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
- temp.E(i) = Vk->E(2 * i); \
- } \
- *Vd = temp; \
+#define VPICKEV(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
+ temp.E(i) = Vk->E(2 * i); \
+ } \
+ *Vd = temp; \
}
VPICKEV(vpickev_b, 16, B)
@@ -2833,21 +2809,20 @@ VPICKEV(vpickev_h, 32, H)
VPICKEV(vpickev_w, 64, W)
VPICKEV(vpickev_d, 128, D)
-#define VPICKOD(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
- temp.E(i) = Vk->E(2 * i + 1); \
- } \
- *Vd = temp; \
+#define VPICKOD(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
+ temp.E(i) = Vk->E(2 * i + 1); \
+ } \
+ *Vd = temp; \
}
VPICKOD(vpickod_b, 16, B)
@@ -2855,21 +2830,20 @@ VPICKOD(vpickod_h, 32, H)
VPICKOD(vpickod_w, 64, W)
VPICKOD(vpickod_d, 128, D)
-#define VILVL(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(2 * i + 1) = Vj->E(i); \
- temp.E(2 * i) = Vk->E(i); \
- } \
- *Vd = temp; \
+#define VILVL(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(2 * i + 1) = Vj->E(i); \
+ temp.E(2 * i) = Vk->E(i); \
+ } \
+ *Vd = temp; \
}
VILVL(vilvl_b, 16, B)
@@ -2877,21 +2851,20 @@ VILVL(vilvl_h, 32, H)
VILVL(vilvl_w, 64, W)
VILVL(vilvl_d, 128, D)
-#define VILVH(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
- temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
- } \
- *Vd = temp; \
+#define VILVH(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
+ temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
+ } \
+ *Vd = temp; \
}
VILVH(vilvh_b, 16, B)
@@ -2916,22 +2889,21 @@ void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
*Vd = temp;
}
-#define VSHUF(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t vk) \
-{ \
- int i, m; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- m = LSX_LEN/BIT; \
- for (i = 0; i < m; i++) { \
- uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
- temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
- } \
- *Vd = temp; \
+#define VSHUF(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, m; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ \
+ m = LSX_LEN/BIT; \
+ for (i = 0; i < m; i++) { \
+ uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
+ temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
+ } \
+ *Vd = temp; \
}
VSHUF(vshuf_h, 16, H)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index eae1929f44..6ead8fb4c5 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -70,17 +70,20 @@ static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
return gen_vvv_ptr_vl(ctx, a, 16, fn);
}
-static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+ gen_helper_gvec_3 *fn)
{
- TCGv_i32 vd = tcg_constant_i32(a->vd);
- TCGv_i32 vj = tcg_constant_i32(a->vj);
- TCGv_i32 vk = tcg_constant_i32(a->vk);
+ tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
+{
CHECK_SXE;
-
- func(cpu_env, vd, vj, vk);
- return true;
+ return gen_vvv_vl(ctx, a, 16, fn);
}
static bool gen_vv(DisasContext *ctx, arg_vv *a,
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 07/57] target/loongarch: Use gen_helper_gvec_2_ptr for 2OP + env vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (5 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 06/57] target/loongarch: Use gen_helper_gvec_3 for 3OP " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 0:59 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 08/57] target/loongarch: Use gen_helper_gvec_2 for 2OP " Song Gao
` (49 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 118 +++++++-------
target/loongarch/vec_helper.c | 161 +++++++++++---------
target/loongarch/insn_trans/trans_vec.c.inc | 129 +++++++++-------
3 files changed, 219 insertions(+), 189 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 4b681e948f..0752cc7212 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -547,73 +547,73 @@ DEF_HELPER_FLAGS_5(vfmaxa_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vfmina_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vfmina_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_3(vflogb_s, void, env, i32, i32)
-DEF_HELPER_3(vflogb_d, void, env, i32, i32)
-
-DEF_HELPER_3(vfclass_s, void, env, i32, i32)
-DEF_HELPER_3(vfclass_d, void, env, i32, i32)
-
-DEF_HELPER_3(vfsqrt_s, void, env, i32, i32)
-DEF_HELPER_3(vfsqrt_d, void, env, i32, i32)
-DEF_HELPER_3(vfrecip_s, void, env, i32, i32)
-DEF_HELPER_3(vfrecip_d, void, env, i32, i32)
-DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32)
-DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32)
-
-DEF_HELPER_3(vfcvtl_s_h, void, env, i32, i32)
-DEF_HELPER_3(vfcvth_s_h, void, env, i32, i32)
-DEF_HELPER_3(vfcvtl_d_s, void, env, i32, i32)
-DEF_HELPER_3(vfcvth_d_s, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vflogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vflogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(vfclass_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfclass_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(vfsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrecip_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrecip_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrsqrt_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrsqrt_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(vfcvtl_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfcvth_s_h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfcvtl_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfcvth_d_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vfcvt_h_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vfcvt_s_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_3(vfrintrne_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrne_d, void, env, i32, i32)
-DEF_HELPER_3(vfrintrz_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrz_d, void, env, i32, i32)
-DEF_HELPER_3(vfrintrp_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrp_d, void, env, i32, i32)
-DEF_HELPER_3(vfrintrm_s, void, env, i32, i32)
-DEF_HELPER_3(vfrintrm_d, void, env, i32, i32)
-DEF_HELPER_3(vfrint_s, void, env, i32, i32)
-DEF_HELPER_3(vfrint_d, void, env, i32, i32)
-
-DEF_HELPER_3(vftintrne_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrne_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrp_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrp_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrm_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrm_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftint_w_s, void, env, i32, i32)
-DEF_HELPER_3(vftint_l_d, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_wu_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrz_lu_d, void, env, i32, i32)
-DEF_HELPER_3(vftint_wu_s, void, env, i32, i32)
-DEF_HELPER_3(vftint_lu_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vfrintrne_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrne_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrz_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrz_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrm_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrintrm_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrint_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vfrint_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(vftintrne_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrne_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrp_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrp_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrm_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrm_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_w_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_l_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrz_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_wu_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftint_lu_d, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vftintrne_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vftintrz_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vftintrp_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vftintrm_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vftint_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
-DEF_HELPER_3(vftintrnel_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrneh_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrzl_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrzh_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrpl_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrph_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrml_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintrmh_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftintl_l_s, void, env, i32, i32)
-DEF_HELPER_3(vftinth_l_s, void, env, i32, i32)
-
-DEF_HELPER_3(vffint_s_w, void, env, i32, i32)
-DEF_HELPER_3(vffint_d_l, void, env, i32, i32)
-DEF_HELPER_3(vffint_s_wu, void, env, i32, i32)
-DEF_HELPER_3(vffint_d_lu, void, env, i32, i32)
-DEF_HELPER_3(vffintl_d_w, void, env, i32, i32)
-DEF_HELPER_3(vffinth_d_w, void, env, i32, i32)
+DEF_HELPER_FLAGS_4(vftintrnel_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrneh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrzl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrzh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrpl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrph_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrml_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintrmh_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftintl_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vftinth_l_s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+
+DEF_HELPER_FLAGS_4(vffint_s_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffint_d_l, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffint_s_wu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffint_d_lu, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffintl_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_4(vffinth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vffint_s_l, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_4(vseqi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 15b361c6b3..2898ae06ce 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2135,17 +2135,18 @@ DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
float_muladd_negate_c | float_muladd_negate_result)
-#define DO_2OP_F(NAME, BIT, E, FN) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = FN(env, Vj->E(i)); \
- } \
+#define DO_2OP_F(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E(i) = FN(env, Vj->E(i)); \
+ } \
}
#define FLOGB(BIT, T) \
@@ -2166,16 +2167,17 @@ static T do_flogb_## BIT(CPULoongArchState *env, T fj) \
FLOGB(32, uint32_t)
FLOGB(64, uint64_t)
-#define FCLASS(NAME, BIT, E, FN) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = FN(env, Vj->E(i)); \
- } \
+#define FCLASS(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E(i) = FN(env, Vj->E(i)); \
+ } \
}
FCLASS(vfclass_s, 32, UW, helper_fclass_s)
@@ -2245,12 +2247,13 @@ static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
return float64_to_float32(d, status);
}
-void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vfcvtl_s_h)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < LSX_LEN/32; i++) {
@@ -2260,12 +2263,13 @@ void HELPER(vfcvtl_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
*Vd = temp;
}
-void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vfcvtl_d_s)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < LSX_LEN/64; i++) {
@@ -2275,12 +2279,13 @@ void HELPER(vfcvtl_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
*Vd = temp;
}
-void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vfcvth_s_h)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < LSX_LEN/32; i++) {
@@ -2290,12 +2295,13 @@ void HELPER(vfcvth_s_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
*Vd = temp;
}
-void HELPER(vfcvth_d_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vfcvth_d_s)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < LSX_LEN/64; i++) {
@@ -2341,11 +2347,12 @@ void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
*Vd = temp;
}
-void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vfrint_s)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < 4; i++) {
@@ -2354,11 +2361,12 @@ void HELPER(vfrint_s)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
}
}
-void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vfrint_d)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < 2; i++) {
@@ -2368,11 +2376,12 @@ void HELPER(vfrint_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
}
#define FCVT_2OP(NAME, BIT, E, MODE) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
{ \
int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
vec_clear_cause(env); \
for (i = 0; i < LSX_LEN/BIT; i++) { \
@@ -2493,19 +2502,20 @@ FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
-#define FTINTL_L_S(NAME, FN) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < 2; i++) { \
- temp.D(i) = FN(env, Vj->UW(i)); \
- } \
- *Vd = temp; \
+#define FTINTL_L_S(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < 2; i++) { \
+ temp.D(i) = FN(env, Vj->UW(i)); \
+ } \
+ *Vd = temp; \
}
FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
@@ -2514,19 +2524,20 @@ FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
-#define FTINTH_L_S(NAME, FN) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < 2; i++) { \
- temp.D(i) = FN(env, Vj->UW(i + 2)); \
- } \
- *Vd = temp; \
+#define FTINTH_L_S(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < 2; i++) { \
+ temp.D(i) = FN(env, Vj->UW(i + 2)); \
+ } \
+ *Vd = temp; \
}
FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
@@ -2555,12 +2566,13 @@ DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
-void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vffintl_d_w)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < 2; i++) {
@@ -2570,12 +2582,13 @@ void HELPER(vffintl_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
*Vd = temp;
}
-void HELPER(vffinth_d_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vffinth_d_w)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
{
int i;
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
vec_clear_cause(env);
for (i = 0; i < 2; i++) {
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 6ead8fb4c5..11d7158809 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -86,6 +86,23 @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
return gen_vvv_vl(ctx, a, 16, fn);
}
+static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
+ gen_helper_gvec_2_ptr *fn)
+{
+ tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ cpu_env,
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+
+static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
+ gen_helper_gvec_2_ptr *fn)
+{
+ CHECK_SXE;
+ return gen_vv_ptr_vl(ctx, a, 16, fn);
+}
+
static bool gen_vv(DisasContext *ctx, arg_vv *a,
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
{
@@ -3697,73 +3714,73 @@ TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
-TRANS(vflogb_s, LSX, gen_vv, gen_helper_vflogb_s)
-TRANS(vflogb_d, LSX, gen_vv, gen_helper_vflogb_d)
+TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
+TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
-TRANS(vfclass_s, LSX, gen_vv, gen_helper_vfclass_s)
-TRANS(vfclass_d, LSX, gen_vv, gen_helper_vfclass_d)
+TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
+TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
-TRANS(vfsqrt_s, LSX, gen_vv, gen_helper_vfsqrt_s)
-TRANS(vfsqrt_d, LSX, gen_vv, gen_helper_vfsqrt_d)
-TRANS(vfrecip_s, LSX, gen_vv, gen_helper_vfrecip_s)
-TRANS(vfrecip_d, LSX, gen_vv, gen_helper_vfrecip_d)
-TRANS(vfrsqrt_s, LSX, gen_vv, gen_helper_vfrsqrt_s)
-TRANS(vfrsqrt_d, LSX, gen_vv, gen_helper_vfrsqrt_d)
+TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
+TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
+TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
+TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
+TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
+TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
-TRANS(vfcvtl_s_h, LSX, gen_vv, gen_helper_vfcvtl_s_h)
-TRANS(vfcvth_s_h, LSX, gen_vv, gen_helper_vfcvth_s_h)
-TRANS(vfcvtl_d_s, LSX, gen_vv, gen_helper_vfcvtl_d_s)
-TRANS(vfcvth_d_s, LSX, gen_vv, gen_helper_vfcvth_d_s)
+TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
+TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
+TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
+TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
-TRANS(vfrintrne_s, LSX, gen_vv, gen_helper_vfrintrne_s)
-TRANS(vfrintrne_d, LSX, gen_vv, gen_helper_vfrintrne_d)
-TRANS(vfrintrz_s, LSX, gen_vv, gen_helper_vfrintrz_s)
-TRANS(vfrintrz_d, LSX, gen_vv, gen_helper_vfrintrz_d)
-TRANS(vfrintrp_s, LSX, gen_vv, gen_helper_vfrintrp_s)
-TRANS(vfrintrp_d, LSX, gen_vv, gen_helper_vfrintrp_d)
-TRANS(vfrintrm_s, LSX, gen_vv, gen_helper_vfrintrm_s)
-TRANS(vfrintrm_d, LSX, gen_vv, gen_helper_vfrintrm_d)
-TRANS(vfrint_s, LSX, gen_vv, gen_helper_vfrint_s)
-TRANS(vfrint_d, LSX, gen_vv, gen_helper_vfrint_d)
-
-TRANS(vftintrne_w_s, LSX, gen_vv, gen_helper_vftintrne_w_s)
-TRANS(vftintrne_l_d, LSX, gen_vv, gen_helper_vftintrne_l_d)
-TRANS(vftintrz_w_s, LSX, gen_vv, gen_helper_vftintrz_w_s)
-TRANS(vftintrz_l_d, LSX, gen_vv, gen_helper_vftintrz_l_d)
-TRANS(vftintrp_w_s, LSX, gen_vv, gen_helper_vftintrp_w_s)
-TRANS(vftintrp_l_d, LSX, gen_vv, gen_helper_vftintrp_l_d)
-TRANS(vftintrm_w_s, LSX, gen_vv, gen_helper_vftintrm_w_s)
-TRANS(vftintrm_l_d, LSX, gen_vv, gen_helper_vftintrm_l_d)
-TRANS(vftint_w_s, LSX, gen_vv, gen_helper_vftint_w_s)
-TRANS(vftint_l_d, LSX, gen_vv, gen_helper_vftint_l_d)
-TRANS(vftintrz_wu_s, LSX, gen_vv, gen_helper_vftintrz_wu_s)
-TRANS(vftintrz_lu_d, LSX, gen_vv, gen_helper_vftintrz_lu_d)
-TRANS(vftint_wu_s, LSX, gen_vv, gen_helper_vftint_wu_s)
-TRANS(vftint_lu_d, LSX, gen_vv, gen_helper_vftint_lu_d)
+TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
+TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
+TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
+TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
+TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
+TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
+TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
+TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
+TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
+TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
+
+TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
+TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
+TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
+TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
+TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
+TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
+TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
+TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
+TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
+TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
+TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
+TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
+TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
+TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
-TRANS(vftintrnel_l_s, LSX, gen_vv, gen_helper_vftintrnel_l_s)
-TRANS(vftintrneh_l_s, LSX, gen_vv, gen_helper_vftintrneh_l_s)
-TRANS(vftintrzl_l_s, LSX, gen_vv, gen_helper_vftintrzl_l_s)
-TRANS(vftintrzh_l_s, LSX, gen_vv, gen_helper_vftintrzh_l_s)
-TRANS(vftintrpl_l_s, LSX, gen_vv, gen_helper_vftintrpl_l_s)
-TRANS(vftintrph_l_s, LSX, gen_vv, gen_helper_vftintrph_l_s)
-TRANS(vftintrml_l_s, LSX, gen_vv, gen_helper_vftintrml_l_s)
-TRANS(vftintrmh_l_s, LSX, gen_vv, gen_helper_vftintrmh_l_s)
-TRANS(vftintl_l_s, LSX, gen_vv, gen_helper_vftintl_l_s)
-TRANS(vftinth_l_s, LSX, gen_vv, gen_helper_vftinth_l_s)
-
-TRANS(vffint_s_w, LSX, gen_vv, gen_helper_vffint_s_w)
-TRANS(vffint_d_l, LSX, gen_vv, gen_helper_vffint_d_l)
-TRANS(vffint_s_wu, LSX, gen_vv, gen_helper_vffint_s_wu)
-TRANS(vffint_d_lu, LSX, gen_vv, gen_helper_vffint_d_lu)
-TRANS(vffintl_d_w, LSX, gen_vv, gen_helper_vffintl_d_w)
-TRANS(vffinth_d_w, LSX, gen_vv, gen_helper_vffinth_d_w)
+TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
+TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
+TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
+TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
+TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
+TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
+TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
+TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
+TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
+TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
+
+TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
+TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
+TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
+TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
+TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
+TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 08/57] target/loongarch: Use gen_helper_gvec_2 for 2OP vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (6 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 07/57] target/loongarch: Use gen_helper_gvec_2_ptr for 2OP + env " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 1:01 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 09/57] target/loongarch: Use gen_helper_gvec_2i for 2OP + imm " Song Gao
` (48 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 58 ++++-----
target/loongarch/vec_helper.c | 124 ++++++++++----------
target/loongarch/insn_trans/trans_vec.c.inc | 16 ++-
3 files changed, 101 insertions(+), 97 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 0752cc7212..523591035d 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -331,37 +331,37 @@ DEF_HELPER_FLAGS_4(vsat_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vsat_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vsat_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_3(vexth_h_b, void, env, i32, i32)
-DEF_HELPER_3(vexth_w_h, void, env, i32, i32)
-DEF_HELPER_3(vexth_d_w, void, env, i32, i32)
-DEF_HELPER_3(vexth_q_d, void, env, i32, i32)
-DEF_HELPER_3(vexth_hu_bu, void, env, i32, i32)
-DEF_HELPER_3(vexth_wu_hu, void, env, i32, i32)
-DEF_HELPER_3(vexth_du_wu, void, env, i32, i32)
-DEF_HELPER_3(vexth_qu_du, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vexth_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsigncov_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_3(vmskltz_b, void, env, i32, i32)
-DEF_HELPER_3(vmskltz_h, void, env, i32, i32)
-DEF_HELPER_3(vmskltz_w, void, env, i32, i32)
-DEF_HELPER_3(vmskltz_d, void, env, i32, i32)
-DEF_HELPER_3(vmskgez_b, void, env, i32, i32)
-DEF_HELPER_3(vmsknz_b, void, env, i32,i32)
+DEF_HELPER_FLAGS_3(vmskltz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskltz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskltz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskltz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmskgez_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
-DEF_HELPER_3(vextl_q_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
-DEF_HELPER_3(vextl_qu_du, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -473,19 +473,19 @@ DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
-DEF_HELPER_3(vclo_b, void, env, i32, i32)
-DEF_HELPER_3(vclo_h, void, env, i32, i32)
-DEF_HELPER_3(vclo_w, void, env, i32, i32)
-DEF_HELPER_3(vclo_d, void, env, i32, i32)
-DEF_HELPER_3(vclz_b, void, env, i32, i32)
-DEF_HELPER_3(vclz_h, void, env, i32, i32)
-DEF_HELPER_3(vclz_w, void, env, i32, i32)
-DEF_HELPER_3(vclz_d, void, env, i32, i32)
-
-DEF_HELPER_3(vpcnt_b, void, env, i32, i32)
-DEF_HELPER_3(vpcnt_h, void, env, i32, i32)
-DEF_HELPER_3(vpcnt_w, void, env, i32, i32)
-DEF_HELPER_3(vpcnt_d, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclo_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclo_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vclz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(vpcnt_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vpcnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vpcnt_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vpcnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vbitclr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vbitclr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 2898ae06ce..fd38b47c28 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -625,30 +625,30 @@ VSAT_U(vsat_hu, 16, UH)
VSAT_U(vsat_wu, 32, UW)
VSAT_U(vsat_du, 64, UD)
-#define VEXTH(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
- } \
+#define VEXTH(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
+ } \
}
-void HELPER(vexth_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
Vd->Q(0) = int128_makes64(Vj->D(1));
}
-void HELPER(vexth_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
}
@@ -677,11 +677,11 @@ static uint64_t do_vmskltz_b(int64_t val)
return c >> 56;
}
-void HELPER(vmskltz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
{
uint16_t temp = 0;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp = do_vmskltz_b(Vj->D(0));
temp |= (do_vmskltz_b(Vj->D(1)) << 8);
@@ -698,11 +698,11 @@ static uint64_t do_vmskltz_h(int64_t val)
return c >> 60;
}
-void HELPER(vmskltz_h)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
{
uint16_t temp = 0;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp = do_vmskltz_h(Vj->D(0));
temp |= (do_vmskltz_h(Vj->D(1)) << 4);
@@ -718,11 +718,11 @@ static uint64_t do_vmskltz_w(int64_t val)
return c >> 62;
}
-void HELPER(vmskltz_w)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
{
uint16_t temp = 0;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp = do_vmskltz_w(Vj->D(0));
temp |= (do_vmskltz_w(Vj->D(1)) << 2);
@@ -734,11 +734,11 @@ static uint64_t do_vmskltz_d(int64_t val)
{
return (uint64_t)val >> 63;
}
-void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
{
uint16_t temp = 0;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp = do_vmskltz_d(Vj->D(0));
temp |= (do_vmskltz_d(Vj->D(1)) << 1);
@@ -746,11 +746,11 @@ void HELPER(vmskltz_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
Vd->D(1) = 0;
}
-void HELPER(vmskgez_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
{
uint16_t temp = 0;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp = do_vmskltz_b(Vj->D(0));
temp |= (do_vmskltz_b(Vj->D(1)) << 8);
@@ -768,11 +768,11 @@ static uint64_t do_vmskez_b(uint64_t a)
return c >> 56;
}
-void HELPER(vmsknz_b)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
{
uint16_t temp = 0;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp = do_vmskez_b(Vj->D(0));
temp |= (do_vmskez_b(Vj->D(1)) << 8);
@@ -809,18 +809,18 @@ void HELPER(NAME)(CPULoongArchState *env, \
*Vd = temp; \
}
-void HELPER(vextl_q_d)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
Vd->Q(0) = int128_makes64(Vj->D(0));
}
-void HELPER(vextl_qu_du)(CPULoongArchState *env, uint32_t vd, uint32_t vj)
+void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
Vd->Q(0) = int128_make64(Vj->D(0));
}
@@ -1899,17 +1899,17 @@ VSSRARNUI(vssrarni_bu_h, 16, B, H)
VSSRARNUI(vssrarni_hu_w, 32, H, W)
VSSRARNUI(vssrarni_wu_d, 64, W, D)
-#define DO_2OP(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) \
- { \
- Vd->E(i) = DO_OP(Vj->E(i)); \
- } \
+#define DO_2OP(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) \
+ { \
+ Vd->E(i) = DO_OP(Vj->E(i)); \
+ } \
}
#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
@@ -1930,17 +1930,17 @@ DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
-#define VPCNT(NAME, BIT, E, FN) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) \
- { \
- Vd->E(i) = FN(Vj->E(i)); \
- } \
+#define VPCNT(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) \
+ { \
+ Vd->E(i) = FN(Vj->E(i)); \
+ } \
}
VPCNT(vpcnt_b, 8, UB, ctpop8)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 11d7158809..4c3d206df1 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -103,15 +103,19 @@ static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
return gen_vv_ptr_vl(ctx, a, 16, fn);
}
-static bool gen_vv(DisasContext *ctx, arg_vv *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
+ gen_helper_gvec_2 *fn)
{
- TCGv_i32 vd = tcg_constant_i32(a->vd);
- TCGv_i32 vj = tcg_constant_i32(a->vj);
+ tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
+{
CHECK_SXE;
- func(cpu_env, vd, vj);
- return true;
+ return gen_vv_vl(ctx, a, 16, fn);
}
static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 09/57] target/loongarch: Use gen_helper_gvec_2i for 2OP + imm vector instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (7 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 08/57] target/loongarch: Use gen_helper_gvec_2 for 2OP " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 1:03 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 10/57] target/loongarch: Replace CHECK_SXE to check_vec(ctx, 16) Song Gao
` (47 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 146 +++----
target/loongarch/vec_helper.c | 445 +++++++++-----------
target/loongarch/insn_trans/trans_vec.c.inc | 18 +-
3 files changed, 291 insertions(+), 318 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 523591035d..1abd9e1410 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -354,32 +354,32 @@ DEF_HELPER_FLAGS_3(vmsknz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vnori_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_4(vsllwil_h_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_w_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_d_w, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsllwil_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_3(vextl_q_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_4(vsllwil_hu_bu, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_wu_hu, void, env, i32, i32, i32)
-DEF_HELPER_4(vsllwil_du_wu, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsllwil_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsllwil_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_3(vextl_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlr_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vsrlri_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlri_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlri_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vsrar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrar_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vsrari_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrari_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrari_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrari_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrari_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrari_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vsrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -388,14 +388,14 @@ DEF_HELPER_FLAGS_4(vsran_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsran_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsran_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vsrlni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrani_d_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vsrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -404,14 +404,14 @@ DEF_HELPER_FLAGS_4(vsrarn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrarn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsrarn_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vsrlrni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrlrni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vsrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vssrln_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vssrln_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -426,22 +426,22 @@ DEF_HELPER_FLAGS_4(vssran_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vssran_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vssran_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrlni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrani_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vssrlrn_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vssrlrn_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -456,22 +456,22 @@ DEF_HELPER_FLAGS_4(vssrarn_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vssrarn_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vssrarn_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vssrlrni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_b_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_h_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_w_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_d_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrlrni_du_q, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_bu_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_hu_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_wu_d, void, env, i32, i32, i32)
-DEF_HELPER_4(vssrarni_du_q, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_b_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_h_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_w_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_d_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrlrni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_bu_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_hu_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_wu_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vssrarni_du_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_3(vclo_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(vclo_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
@@ -516,8 +516,8 @@ DEF_HELPER_FLAGS_4(vbitrevi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vfrstp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vfrstp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vfrstpi_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vfrstpi_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_5(vfadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(vfadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, env, i32)
@@ -686,14 +686,14 @@ DEF_HELPER_FLAGS_5(vshuf_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vshuf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vshuf_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vshuf_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_4(vshuf4i_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vshuf4i_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_4(vpermi_w, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_4(vextrins_b, void, env, i32, i32, i32)
-DEF_HELPER_4(vextrins_h, void, env, i32, i32, i32)
-DEF_HELPER_4(vextrins_w, void, env, i32, i32, i32)
-DEF_HELPER_4(vextrins_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vextrins_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vextrins_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index fd38b47c28..4e10957b90 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -791,22 +791,21 @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
}
}
-#define VSLLWIL(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- typedef __typeof(temp.E1(0)) TD; \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
- } \
- *Vd = temp; \
+#define VSLLWIL(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(temp.E1(0)) TD; \
+ \
+ temp.D(0) = 0; \
+ temp.D(1) = 0; \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
+ } \
+ *Vd = temp; \
}
void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
@@ -865,17 +864,16 @@ VSRLR(vsrlr_h, 16, uint16_t, H)
VSRLR(vsrlr_w, 32, uint32_t, W)
VSRLR(vsrlr_d, 64, uint64_t, D)
-#define VSRLRI(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
- } \
+#define VSRLRI(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
+ } \
}
VSRLRI(vsrlri_b, 8, B)
@@ -916,17 +914,16 @@ VSRAR(vsrar_h, 16, uint16_t, H)
VSRAR(vsrar_w, 32, uint32_t, W)
VSRAR(vsrar_d, 64, uint64_t, D)
-#define VSRARI(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
- } \
+#define VSRARI(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
+ } \
}
VSRARI(vsrari_b, 8, B)
@@ -972,31 +969,29 @@ VSRAN(vsran_b_h, 16, uint16_t, B, H)
VSRAN(vsran_h_w, 32, uint32_t, H, W)
VSRAN(vsran_w_d, 64, uint64_t, W, D)
-#define VSRLNI(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
- temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrlni_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+#define VSRLNI(NAME, BIT, T, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, max; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ temp.D(0) = 0; \
+ temp.D(1) = 0; \
+ max = LSX_LEN/BIT; \
+ for (i = 0; i < max; i++) { \
+ temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
+ temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp.D(0) = 0;
temp.D(1) = 0;
@@ -1009,31 +1004,29 @@ VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
-#define VSRANI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
- temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrani_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+#define VSRANI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, max; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ temp.D(0) = 0; \
+ temp.D(1) = 0; \
+ max = LSX_LEN/BIT; \
+ for (i = 0; i < max; i++) { \
+ temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
+ temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp.D(0) = 0;
temp.D(1) = 0;
@@ -1082,31 +1075,29 @@ VSRARN(vsrarn_b_h, 16, uint8_t, B, H)
VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
-#define VSRLRNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
- temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrlrni_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+#define VSRLRNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, max; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ temp.D(0) = 0; \
+ temp.D(1) = 0; \
+ max = LSX_LEN/BIT; \
+ for (i = 0; i < max; i++) { \
+ temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
+ temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
Int128 r1, r2;
if (imm == 0) {
@@ -1126,31 +1117,29 @@ VSRLRNI(vsrlrni_b_h, 16, B, H)
VSRLRNI(vsrlrni_h_w, 32, H, W)
VSRLRNI(vsrlrni_w_d, 64, W, D)
-#define VSRARNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
- temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrarni_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+#define VSRARNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, max; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ temp.D(0) = 0; \
+ temp.D(1) = 0; \
+ max = LSX_LEN/BIT; \
+ for (i = 0; i < max; i++) { \
+ temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
+ temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
Int128 r1, r2;
if (imm == 0) {
@@ -1336,13 +1325,12 @@ VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
#define VSSRLNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
@@ -1351,12 +1339,11 @@ void HELPER(NAME)(CPULoongArchState *env, \
*Vd = temp; \
}
-void HELPER(vssrlni_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
Int128 shft_res1, shft_res2, mask;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
if (imm == 0) {
shft_res1 = Vj->Q(0);
@@ -1385,13 +1372,12 @@ VSSRLNI(vssrlni_h_w, 32, H, W)
VSSRLNI(vssrlni_w_d, 64, W, D)
#define VSSRANI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
@@ -1400,12 +1386,11 @@ void HELPER(NAME)(CPULoongArchState *env, \
*Vd = temp; \
}
-void HELPER(vssrani_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
Int128 shft_res1, shft_res2, mask, min;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
if (imm == 0) {
shft_res1 = Vj->Q(0);
@@ -1439,13 +1424,12 @@ VSSRANI(vssrani_h_w, 32, H, W)
VSSRANI(vssrani_w_d, 64, W, D)
#define VSSRLNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \
@@ -1454,12 +1438,11 @@ void HELPER(NAME)(CPULoongArchState *env, \
*Vd = temp; \
}
-void HELPER(vssrlni_du_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
Int128 shft_res1, shft_res2, mask;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
if (imm == 0) {
shft_res1 = Vj->Q(0);
@@ -1488,13 +1471,12 @@ VSSRLNUI(vssrlni_hu_w, 32, H, W)
VSSRLNUI(vssrlni_wu_d, 64, W, D)
#define VSSRANUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \
@@ -1503,12 +1485,11 @@ void HELPER(NAME)(CPULoongArchState *env, \
*Vd = temp; \
}
-void HELPER(vssrani_du_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
Int128 shft_res1, shft_res2, mask;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
if (imm == 0) {
shft_res1 = Vj->Q(0);
@@ -1701,13 +1682,12 @@ VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
#define VSSRLRNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
@@ -1717,12 +1697,11 @@ void HELPER(NAME)(CPULoongArchState *env, \
}
#define VSSRLRNI_Q(NAME, sh) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
Int128 shft_res1, shft_res2, mask, r1, r2; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
if (imm == 0) { \
shft_res1 = Vj->Q(0); \
@@ -1756,13 +1735,12 @@ VSSRLRNI(vssrlrni_w_d, 64, W, D)
VSSRLRNI_Q(vssrlrni_d_q, 63)
#define VSSRARNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
@@ -1771,12 +1749,11 @@ void HELPER(NAME)(CPULoongArchState *env,
*Vd = temp; \
}
-void HELPER(vssrarni_d_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
if (imm == 0) {
shft_res1 = Vj->Q(0);
@@ -1814,13 +1791,12 @@ VSSRARNI(vssrarni_h_w, 32, H, W)
VSSRARNI(vssrarni_w_d, 64, W, D)
#define VSSRLRNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \
@@ -1835,13 +1811,12 @@ VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
VSSRLRNI_Q(vssrlrni_du_q, 64)
#define VSSRARNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
int i; \
VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
\
for (i = 0; i < LSX_LEN/BIT; i++) { \
temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \
@@ -1850,12 +1825,11 @@ void HELPER(NAME)(CPULoongArchState *env, \
*Vd = temp; \
}
-void HELPER(vssrarni_du_q)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
if (imm == 0) {
shft_res1 = Vj->Q(0);
@@ -2023,21 +1997,20 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
VFRSTP(vfrstp_b, 8, 0xf, B)
VFRSTP(vfrstp_h, 16, 0x7, H)
-#define VFRSTPI(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i, m; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- if (Vj->E(i) < 0) { \
- break; \
- } \
- } \
- m = imm % (LSX_LEN/BIT); \
- Vd->E(m) = i; \
+#define VFRSTPI(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, m; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ if (Vj->E(i) < 0) { \
+ break; \
+ } \
+ } \
+ m = imm % (LSX_LEN/BIT); \
+ Vd->E(m) = i; \
}
VFRSTPI(vfrstpi_b, 8, B)
@@ -2923,31 +2896,29 @@ VSHUF(vshuf_h, 16, H)
VSHUF(vshuf_w, 32, W)
VSHUF(vshuf_d, 64, D)
-#define VSHUF4I(NAME, BIT, E) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
- (2 * ((i) & 0x03))) & 0x03)); \
- } \
- *Vd = temp; \
+#define VSHUF4I(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
+ (2 * ((i) & 0x03))) & 0x03)); \
+ } \
+ *Vd = temp; \
}
VSHUF4I(vshuf4i_b, 8, B)
VSHUF4I(vshuf4i_h, 16, H)
VSHUF4I(vshuf4i_w, 32, W)
-void HELPER(vshuf4i_d)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
VReg temp;
temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
@@ -2955,12 +2926,11 @@ void HELPER(vshuf4i_d)(CPULoongArchState *env,
*Vd = temp;
}
-void HELPER(vpermi_w)(CPULoongArchState *env,
- uint32_t vd, uint32_t vj, uint32_t imm)
+void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
VReg temp;
- VReg *Vd = &(env->fpr[vd].vreg);
- VReg *Vj = &(env->fpr[vj].vreg);
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
temp.W(0) = Vj->W(imm & 0x3);
temp.W(1) = Vj->W((imm >> 2) & 0x3);
@@ -2969,17 +2939,16 @@ void HELPER(vpermi_w)(CPULoongArchState *env,
*Vd = temp;
}
-#define VEXTRINS(NAME, BIT, E, MASK) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t vd, uint32_t vj, uint32_t imm) \
-{ \
- int ins, extr; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- ins = (imm >> 4) & MASK; \
- extr = imm & MASK; \
- Vd->E(ins) = Vj->E(extr); \
+#define VEXTRINS(NAME, BIT, E, MASK) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int ins, extr; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ \
+ ins = (imm >> 4) & MASK; \
+ extr = imm & MASK; \
+ Vd->E(ins) = Vj->E(extr); \
}
VEXTRINS(vextrins_b, 8, B, 0xf)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 4c3d206df1..41c2996e90 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -118,16 +118,20 @@ static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
return gen_vv_vl(ctx, a, 16, fn);
}
-static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
+ gen_helper_gvec_2i *fn)
{
- TCGv_i32 vd = tcg_constant_i32(a->vd);
- TCGv_i32 vj = tcg_constant_i32(a->vj);
- TCGv_i32 imm = tcg_constant_i32(a->imm);
+ tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ tcg_constant_i64(a->imm),
+ oprsz, ctx->vl / 8, oprsz, fn);
+ return true;
+}
+static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
+{
CHECK_SXE;
- func(cpu_env, vd, vj, imm);
- return true;
+ return gen_vv_i_vl(ctx, a, 16, fn);
}
static bool gen_cv(DisasContext *ctx, arg_cv *a,
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 10/57] target/loongarch: Replace CHECK_SXE to check_vec(ctx, 16)
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (8 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 09/57] target/loongarch: Use gen_helper_gvec_2i for 2OP + imm " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 1:04 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 11/57] target/loongarch: Add LASX data support Song Gao
` (46 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Intrudce a new function check_vec to replace CHECK_SXE
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insn_trans/trans_vec.c.inc | 248 +++++++++++++++-----
1 file changed, 192 insertions(+), 56 deletions(-)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 41c2996e90..0985191c70 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -5,14 +5,23 @@
*/
#ifndef CONFIG_USER_ONLY
-#define CHECK_SXE do { \
- if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \
- generate_exception(ctx, EXCCODE_SXD); \
- return true; \
- } \
-} while (0)
+
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
+{
+ if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
+ generate_exception(ctx, EXCCODE_SXD);
+ return false;
+ }
+ return true;
+}
+
#else
-#define CHECK_SXE
+
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
+{
+ return true;
+}
+
#endif
static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
@@ -30,7 +39,10 @@ static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
gen_helper_gvec_4_ptr *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vvvv_ptr_vl(ctx, a, 16, fn);
}
@@ -48,7 +60,10 @@ static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
gen_helper_gvec_4 *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vvvv_vl(ctx, a, 16, fn);
}
@@ -66,7 +81,10 @@ static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
gen_helper_gvec_3_ptr *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vvv_ptr_vl(ctx, a, 16, fn);
}
@@ -82,7 +100,10 @@ static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vvv_vl(ctx, a, 16, fn);
}
@@ -99,7 +120,10 @@ static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
gen_helper_gvec_2_ptr *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vv_ptr_vl(ctx, a, 16, fn);
}
@@ -114,7 +138,10 @@ static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vv_vl(ctx, a, 16, fn);
}
@@ -130,7 +157,10 @@ static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gen_vv_i_vl(ctx, a, 16, fn);
}
@@ -140,7 +170,10 @@ static bool gen_cv(DisasContext *ctx, arg_cv *a,
TCGv_i32 vj = tcg_constant_i32(a->vj);
TCGv_i32 cd = tcg_constant_i32(a->cd);
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
func(cpu_env, cd, vj);
return true;
}
@@ -162,7 +195,10 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t))
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gvec_vvv_vl(ctx, a, 16, mop, func);
}
@@ -184,7 +220,10 @@ static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
uint32_t, uint32_t))
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gvec_vv_vl(ctx, a, 16, mop, func);
}
@@ -204,7 +243,10 @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
int64_t, uint32_t, uint32_t))
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gvec_vv_i_vl(ctx, a, 16, mop, func);
}
@@ -220,7 +262,10 @@ static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
{
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
return gvec_subi_vl(ctx, a, 16, mop);
}
@@ -238,7 +283,9 @@ static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
return false; \
} \
\
- CHECK_SXE; \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
\
rh = tcg_temp_new_i64(); \
rl = tcg_temp_new_i64(); \
@@ -3138,7 +3185,9 @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
sel = (a->imm >> 12) & 0x1;
@@ -3168,7 +3217,9 @@ static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
vd_ofs = vec_full_offset(a->vd);
vj_ofs = vec_full_offset(a->vj);
@@ -3795,7 +3846,9 @@ static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
{
uint32_t vd_ofs, vj_ofs, vk_ofs;
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
vd_ofs = vec_full_offset(a->vd);
vj_ofs = vec_full_offset(a->vj);
@@ -3841,7 +3894,9 @@ static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
{ \
uint32_t vd_ofs, vj_ofs; \
\
- CHECK_SXE; \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
\
static const TCGOpcode vecop_list[] = { \
INDEX_op_cmp_vec, 0 \
@@ -3890,7 +3945,9 @@ static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
{ \
uint32_t vd_ofs, vj_ofs; \
\
- CHECK_SXE; \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
\
static const TCGOpcode vecop_list[] = { \
INDEX_op_cmp_vec, 0 \
@@ -3988,7 +4045,9 @@ static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
flags = get_fcmp_flags(a->fcond >> 1);
@@ -4009,7 +4068,9 @@ static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
flags = get_fcmp_flags(a->fcond >> 1);
@@ -4024,7 +4085,9 @@ static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
vec_full_offset(a->vk), vec_full_offset(a->vj),
@@ -4050,7 +4113,9 @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
16, ctx->vl/8, a->imm, &op);
@@ -4073,7 +4138,10 @@ static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
return false; \
} \
\
- CHECK_SXE; \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
+ \
tcg_gen_or_i64(t1, al, ah); \
tcg_gen_setcondi_i64(COND, t1, t1, 0); \
tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
@@ -4101,7 +4169,10 @@ static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_st8_i64(src, cpu_env,
offsetof(CPULoongArchState, fpr[a->vd].vreg.B(a->imm)));
return true;
@@ -4115,7 +4186,10 @@ static bool trans_vinsgr2vr_h(DisasContext *ctx, arg_vr_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_st16_i64(src, cpu_env,
offsetof(CPULoongArchState, fpr[a->vd].vreg.H(a->imm)));
return true;
@@ -4129,7 +4203,10 @@ static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_st32_i64(src, cpu_env,
offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
return true;
@@ -4143,7 +4220,10 @@ static bool trans_vinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_st_i64(src, cpu_env,
offsetof(CPULoongArchState, fpr[a->vd].vreg.D(a->imm)));
return true;
@@ -4157,7 +4237,10 @@ static bool trans_vpickve2gr_b(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld8s_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
return true;
@@ -4171,7 +4254,10 @@ static bool trans_vpickve2gr_h(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld16s_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
return true;
@@ -4185,7 +4271,10 @@ static bool trans_vpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld32s_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
return true;
@@ -4199,7 +4288,10 @@ static bool trans_vpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
return true;
@@ -4213,7 +4305,10 @@ static bool trans_vpickve2gr_bu(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld8u_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.B(a->imm)));
return true;
@@ -4227,7 +4322,10 @@ static bool trans_vpickve2gr_hu(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld16u_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.H(a->imm)));
return true;
@@ -4241,7 +4339,10 @@ static bool trans_vpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld32u_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.W(a->imm)));
return true;
@@ -4255,7 +4356,10 @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_ld_i64(dst, cpu_env,
offsetof(CPULoongArchState, fpr[a->vj].vreg.D(a->imm)));
return true;
@@ -4269,7 +4373,9 @@ static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
16, ctx->vl/8, src);
@@ -4287,7 +4393,10 @@ static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.B((a->imm))),
@@ -4301,7 +4410,10 @@ static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.H((a->imm))),
@@ -4314,7 +4426,10 @@ static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.W((a->imm))),
@@ -4327,7 +4442,10 @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
offsetof(CPULoongArchState,
fpr[a->vj].vreg.D((a->imm))),
@@ -4346,7 +4464,9 @@ static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
tcg_gen_shli_i64(t0, t0, vece);
@@ -4376,7 +4496,9 @@ static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
desthigh = tcg_temp_new_i64();
destlow = tcg_temp_new_i64();
@@ -4410,7 +4532,9 @@ static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
desthigh = tcg_temp_new_i64();
destlow = tcg_temp_new_i64();
@@ -4488,7 +4612,9 @@ static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
addr = gpr_src(ctx, a->rj, EXT_NONE);
val = tcg_temp_new_i128();
@@ -4515,7 +4641,9 @@ static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
addr = gpr_src(ctx, a->rj, EXT_NONE);
val = tcg_temp_new_i128();
@@ -4542,7 +4670,9 @@ static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
src1 = gpr_src(ctx, a->rj, EXT_NONE);
src2 = gpr_src(ctx, a->rk, EXT_NONE);
@@ -4569,7 +4699,9 @@ static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
return false;
}
- CHECK_SXE;
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
src1 = gpr_src(ctx, a->rj, EXT_NONE);
src2 = gpr_src(ctx, a->rk, EXT_NONE);
@@ -4596,7 +4728,9 @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
return false; \
} \
\
- CHECK_SXE; \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
\
addr = gpr_src(ctx, a->rj, EXT_NONE); \
val = tcg_temp_new_i64(); \
@@ -4624,7 +4758,9 @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
return false; \
} \
\
- CHECK_SXE; \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
\
addr = gpr_src(ctx, a->rj, EXT_NONE); \
val = tcg_temp_new_i64(); \
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 11/57] target/loongarch: Add LASX data support
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (9 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 10/57] target/loongarch: Replace CHECK_SXE to check_vec(ctx, 16) Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 12/57] target/loongarch: check_vec support check LASX instructions Song Gao
` (45 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.h | 24 ++++++++++++----------
target/loongarch/internals.h | 22 --------------------
target/loongarch/vec.h | 33 ++++++++++++++++++++++++++++++
linux-user/loongarch64/signal.c | 1 +
target/loongarch/cpu.c | 1 +
target/loongarch/gdbstub.c | 1 +
target/loongarch/machine.c | 36 ++++++++++++++++++++++++++++++++-
target/loongarch/translate.c | 1 +
target/loongarch/vec_helper.c | 1 +
9 files changed, 86 insertions(+), 34 deletions(-)
create mode 100644 target/loongarch/vec.h
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 4d7201995a..347ad1c8a9 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -251,18 +251,20 @@ FIELD(TLB_MISC, ASID, 1, 10)
FIELD(TLB_MISC, VPPN, 13, 35)
FIELD(TLB_MISC, PS, 48, 6)
-#define LSX_LEN (128)
+#define LSX_LEN (128)
+#define LASX_LEN (256)
+
typedef union VReg {
- int8_t B[LSX_LEN / 8];
- int16_t H[LSX_LEN / 16];
- int32_t W[LSX_LEN / 32];
- int64_t D[LSX_LEN / 64];
- uint8_t UB[LSX_LEN / 8];
- uint16_t UH[LSX_LEN / 16];
- uint32_t UW[LSX_LEN / 32];
- uint64_t UD[LSX_LEN / 64];
- Int128 Q[LSX_LEN / 128];
-}VReg;
+ int8_t B[LASX_LEN / 8];
+ int16_t H[LASX_LEN / 16];
+ int32_t W[LASX_LEN / 32];
+ int64_t D[LASX_LEN / 64];
+ uint8_t UB[LASX_LEN / 8];
+ uint16_t UH[LASX_LEN / 16];
+ uint32_t UW[LASX_LEN / 32];
+ uint64_t UD[LASX_LEN / 64];
+ Int128 Q[LASX_LEN / 128];
+} VReg;
typedef union fpr_t fpr_t;
union fpr_t {
diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h
index 7b0f29c942..c492863cc5 100644
--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -21,28 +21,6 @@
/* Global bit for huge page */
#define LOONGARCH_HGLOBAL_SHIFT 12
-#if HOST_BIG_ENDIAN
-#define B(x) B[15 - (x)]
-#define H(x) H[7 - (x)]
-#define W(x) W[3 - (x)]
-#define D(x) D[1 - (x)]
-#define UB(x) UB[15 - (x)]
-#define UH(x) UH[7 - (x)]
-#define UW(x) UW[3 - (x)]
-#define UD(x) UD[1 -(x)]
-#define Q(x) Q[x]
-#else
-#define B(x) B[x]
-#define H(x) H[x]
-#define W(x) W[x]
-#define D(x) D[x]
-#define UB(x) UB[x]
-#define UH(x) UH[x]
-#define UW(x) UW[x]
-#define UD(x) UD[x]
-#define Q(x) Q[x]
-#endif
-
void loongarch_translate_init(void);
void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
new file mode 100644
index 0000000000..2f23cae7d7
--- /dev/null
+++ b/target/loongarch/vec.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch vector utilitites
+ *
+ * Copyright (c) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_VEC_H
+#define LOONGARCH_VEC_H
+
+#if HOST_BIG_ENDIAN
+#define B(x) B[(x) ^ 15]
+#define H(x) H[(x) ^ 7]
+#define W(x) W[(x) ^ 3]
+#define D(x) D[(x) ^ 1]
+#define UB(x) UB[(x) ^ 15]
+#define UH(x) UH[(x) ^ 7]
+#define UW(x) UW[(x) ^ 3]
+#define UD(x) UD[(x) ^ 1]
+#define Q(x) Q[x]
+#else
+#define B(x) B[x]
+#define H(x) H[x]
+#define W(x) W[x]
+#define D(x) D[x]
+#define UB(x) UB[x]
+#define UH(x) UH[x]
+#define UW(x) UW[x]
+#define UD(x) UD[x]
+#define Q(x) Q[x]
+#endif /* HOST_BIG_ENDIAN */
+
+#endif /* LOONGARCH_VEC_H */
diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
index bb8efb1172..39572c1190 100644
--- a/linux-user/loongarch64/signal.c
+++ b/linux-user/loongarch64/signal.c
@@ -12,6 +12,7 @@
#include "linux-user/trace.h"
#include "target/loongarch/internals.h"
+#include "target/loongarch/vec.h"
/* FP context was used */
#define SC_USED_FP (1 << 0)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 65f9320e34..4d72e905aa 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -19,6 +19,7 @@
#include "cpu-csr.h"
#include "sysemu/reset.h"
#include "tcg/tcg.h"
+#include "vec.h"
const char * const regnames[32] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
index b09804b62f..5fc2f19e96 100644
--- a/target/loongarch/gdbstub.c
+++ b/target/loongarch/gdbstub.c
@@ -11,6 +11,7 @@
#include "internals.h"
#include "exec/gdbstub.h"
#include "gdbstub/helpers.h"
+#include "vec.h"
uint64_t read_fcc(CPULoongArchState *env)
{
diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c
index d8ac99c9a4..1c4e01d076 100644
--- a/target/loongarch/machine.c
+++ b/target/loongarch/machine.c
@@ -8,7 +8,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "migration/cpu.h"
-#include "internals.h"
+#include "vec.h"
static const VMStateDescription vmstate_fpu_reg = {
.name = "fpu_reg",
@@ -76,6 +76,39 @@ static const VMStateDescription vmstate_lsx = {
},
};
+static const VMStateDescription vmstate_lasxh_reg = {
+ .name = "lasxh_reg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(UD(2), VReg),
+ VMSTATE_UINT64(UD(3), VReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_LASXH_REGS(_field, _state, _start) \
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, 32, 0, \
+ vmstate_lasxh_reg, fpr_t)
+
+static bool lasx_needed(void *opaque)
+{
+ LoongArchCPU *cpu = opaque;
+
+ return FIELD_EX64(cpu->env.cpucfg[2], CPUCFG2, LASX);
+}
+
+static const VMStateDescription vmstate_lasx = {
+ .name = "cpu/lasx",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = lasx_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_LASXH_REGS(env.fpr, LoongArchCPU, 0),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
/* TLB state */
const VMStateDescription vmstate_tlb = {
.name = "cpu/tlb",
@@ -163,6 +196,7 @@ const VMStateDescription vmstate_loongarch_cpu = {
.subsections = (const VMStateDescription*[]) {
&vmstate_fpu,
&vmstate_lsx,
+ &vmstate_lasx,
NULL
}
};
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index 288727181b..7f3958a1f4 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -18,6 +18,7 @@
#include "fpu/softfloat.h"
#include "translate.h"
#include "internals.h"
+#include "vec.h"
/* Global register indices */
TCGv cpu_gpr[32], cpu_pc;
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 4e10957b90..c784f98ab2 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -12,6 +12,7 @@
#include "fpu/softfloat.h"
#include "internals.h"
#include "tcg/tcg.h"
+#include "vec.h"
#define DO_ADD(a, b) (a + b)
#define DO_SUB(a, b) (a - b)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 12/57] target/loongarch: check_vec support check LASX instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (10 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 11/57] target/loongarch: Add LASX data support Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 13/57] target/loongarch: Add avail_LASX to " Song Gao
` (44 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/cpu.h | 2 ++
target/loongarch/cpu.c | 2 ++
target/loongarch/insn_trans/trans_vec.c.inc | 6 ++++++
3 files changed, 10 insertions(+)
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 347ad1c8a9..f125a8e49b 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -462,6 +462,7 @@ static inline void set_pc(CPULoongArchState *env, uint64_t value)
#define HW_FLAGS_CRMD_PG R_CSR_CRMD_PG_MASK /* 0x10 */
#define HW_FLAGS_EUEN_FPE 0x04
#define HW_FLAGS_EUEN_SXE 0x08
+#define HW_FLAGS_EUEN_ASXE 0x10
#define HW_FLAGS_VA32 0x20
static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
@@ -472,6 +473,7 @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc,
*flags = env->CSR_CRMD & (R_CSR_CRMD_PLV_MASK | R_CSR_CRMD_PG_MASK);
*flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, FPE) * HW_FLAGS_EUEN_FPE;
*flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, SXE) * HW_FLAGS_EUEN_SXE;
+ *flags |= FIELD_EX64(env->CSR_EUEN, CSR_EUEN, ASXE) * HW_FLAGS_EUEN_ASXE;
*flags |= is_va32(env) * HW_FLAGS_VA32;
}
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 4d72e905aa..a1d3f680d8 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -55,6 +55,7 @@ static const char * const excp_names[] = {
[EXCCODE_DBP] = "Debug breakpoint",
[EXCCODE_BCE] = "Bound Check Exception",
[EXCCODE_SXD] = "128 bit vector instructions Disable exception",
+ [EXCCODE_ASXD] = "256 bit vector instructions Disable exception",
};
const char *loongarch_exception_name(int32_t exception)
@@ -190,6 +191,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
case EXCCODE_FPD:
case EXCCODE_FPE:
case EXCCODE_SXD:
+ case EXCCODE_ASXD:
env->CSR_BADV = env->pc;
QEMU_FALLTHROUGH;
case EXCCODE_BCE:
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 0985191c70..a90afd3b82 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -12,6 +12,12 @@ static bool check_vec(DisasContext *ctx, uint32_t oprsz)
generate_exception(ctx, EXCCODE_SXD);
return false;
}
+
+ if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
+ generate_exception(ctx, EXCCODE_ASXD);
+ return false;
+ }
+
return true;
}
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 13/57] target/loongarch: Add avail_LASX to check LASX instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (11 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 12/57] target/loongarch: check_vec support check LASX instructions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub Song Gao
` (43 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/translate.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index 89b49a859e..195f53573a 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -23,6 +23,7 @@
#define avail_LSPW(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSPW))
#define avail_LAM(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LAM))
#define avail_LSX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LSX))
+#define avail_LASX(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LASX))
#define avail_IOCSR(C) (FIELD_EX32((C)->cpucfg1, CPUCFG1, IOCSR))
/*
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (12 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 13/57] target/loongarch: Add avail_LASX to " Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 12:13 ` gaosong
2023-09-10 1:44 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 15/57] target/loongarch: Implement xvreplgr2vr Song Gao
` (42 subsequent siblings)
56 siblings, 2 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVADD.{B/H/W/D/Q};
- XVSUB.{B/H/W/D/Q}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 14 +++
target/loongarch/disas.c | 23 +++++
target/loongarch/translate.c | 4 +
target/loongarch/insn_trans/trans_vec.c.inc | 106 +++++++++++++-------
4 files changed, 112 insertions(+), 35 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index c9c3bc2c73..bcc18fb6c5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1296,3 +1296,17 @@ vstelm_d 0011 00010001 0 . ........ ..... ..... @vr_i8i1
vstelm_w 0011 00010010 .. ........ ..... ..... @vr_i8i2
vstelm_h 0011 0001010 ... ........ ..... ..... @vr_i8i3
vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4
+
+#
+# LoongArch LASX instructions
+#
+xvadd_b 0111 01000000 10100 ..... ..... ..... @vvv
+xvadd_h 0111 01000000 10101 ..... ..... ..... @vvv
+xvadd_w 0111 01000000 10110 ..... ..... ..... @vvv
+xvadd_d 0111 01000000 10111 ..... ..... ..... @vvv
+xvadd_q 0111 01010010 11010 ..... ..... ..... @vvv
+xvsub_b 0111 01000000 11000 ..... ..... ..... @vvv
+xvsub_h 0111 01000000 11001 ..... ..... ..... @vvv
+xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
+xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
+xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 5c402d944d..d8b62ba532 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1695,3 +1695,26 @@ INSN_LSX(vstelm_d, vr_ii)
INSN_LSX(vstelm_w, vr_ii)
INSN_LSX(vstelm_h, vr_ii)
INSN_LSX(vstelm_b, vr_ii)
+
+#define INSN_LASX(insn, type) \
+static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
+{ \
+ output_##type ## _x(ctx, a, #insn); \
+ return true; \
+}
+
+static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
+}
+
+INSN_LASX(xvadd_b, vvv)
+INSN_LASX(xvadd_h, vvv)
+INSN_LASX(xvadd_w, vvv)
+INSN_LASX(xvadd_d, vvv)
+INSN_LASX(xvadd_q, vvv)
+INSN_LASX(xvsub_b, vvv)
+INSN_LASX(xvsub_h, vvv)
+INSN_LASX(xvsub_w, vvv)
+INSN_LASX(xvsub_d, vvv)
+INSN_LASX(xvsub_q, vvv)
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index 7f3958a1f4..10e2fe8ff6 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -124,6 +124,10 @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
ctx->vl = LSX_LEN;
}
+ if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) {
+ ctx->vl = LASX_LEN;
+ }
+
ctx->la64 = is_la64(env);
ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0;
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index a90afd3b82..47cf053e0a 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -208,6 +208,16 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
return gvec_vvv_vl(ctx, a, 16, mop, func);
}
+static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gvec_vvv_vl(ctx, a, 32, mop, func);
+}
static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
uint32_t oprsz, MemOp mop,
@@ -279,47 +289,73 @@ TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
+TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
+TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
+TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
+TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
+
+static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ int i;
+ TCGv_i64 rh, rl, ah, al, bh, bl;
+
+ rh = tcg_temp_new_i64();
+ rl = tcg_temp_new_i64();
+ ah = tcg_temp_new_i64();
+ al = tcg_temp_new_i64();
+ bh = tcg_temp_new_i64();
+ bl = tcg_temp_new_i64();
+
+ for (i = 0; i < oprsz / 16; i++) {
+ get_vreg64(ah, a->vj, 1 + i * 2);
+ get_vreg64(al, a->vj, i * 2);
+ get_vreg64(bh, a->vk, 1 + i * 2);
+ get_vreg64(bl, a->vk, i * 2);
+
+ func(rl, rh, al, ah, bl, bh);
-#define VADDSUB_Q(NAME) \
-static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
-{ \
- TCGv_i64 rh, rl, ah, al, bh, bl; \
- \
- if (!avail_LSX(ctx)) { \
- return false; \
- } \
- \
- if (!check_vec(ctx, 16)) { \
- return true; \
- } \
- \
- rh = tcg_temp_new_i64(); \
- rl = tcg_temp_new_i64(); \
- ah = tcg_temp_new_i64(); \
- al = tcg_temp_new_i64(); \
- bh = tcg_temp_new_i64(); \
- bl = tcg_temp_new_i64(); \
- \
- get_vreg64(ah, a->vj, 1); \
- get_vreg64(al, a->vj, 0); \
- get_vreg64(bh, a->vk, 1); \
- get_vreg64(bl, a->vk, 0); \
- \
- tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh); \
- \
- set_vreg64(rh, a->vd, 1); \
- set_vreg64(rl, a->vd, 0); \
- \
- return true; \
-}
-
-VADDSUB_Q(add)
-VADDSUB_Q(sub)
+ set_vreg64(rh, a->vd, 1 + i * 2);
+ set_vreg64(rl, a->vd, i * 2);
+ }
+ return true;
+}
+
+static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ return gen_vaddsub_q_vl(ctx, a, 16, func);
+}
+
+static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+ return gen_vaddsub_q_vl(ctx, a, 16, func);
+}
TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
+TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
+TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
+TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
+TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
+
+TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
+TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
+TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
+TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub
2023-09-07 8:31 ` [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub Song Gao
@ 2023-09-07 12:13 ` gaosong
2023-09-10 1:44 ` Richard Henderson
1 sibling, 0 replies; 87+ messages in thread
From: gaosong @ 2023-09-07 12:13 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
在 2023/9/7 下午4:31, Song Gao 写道:
> +static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
> + TCGv_i64, TCGv_i64, TCGv_i64))
> +{
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> + return gen_vaddsub_q_vl(ctx, a, 16, func);
> +}
Typo, 16->32, I will correct it on v6.
Thanks.
Song Gao
^ permalink raw reply [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub
2023-09-07 8:31 ` [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub Song Gao
2023-09-07 12:13 ` gaosong
@ 2023-09-10 1:44 ` Richard Henderson
2023-09-11 12:27 ` gaosong
1 sibling, 1 reply; 87+ messages in thread
From: Richard Henderson @ 2023-09-10 1:44 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> --- a/target/loongarch/insn_trans/trans_vec.c.inc
> +++ b/target/loongarch/insn_trans/trans_vec.c.inc
> @@ -208,6 +208,16 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
> return gvec_vvv_vl(ctx, a, 16, mop, func);
> }
>
> +static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
> + void (*func)(unsigned, uint32_t, uint32_t,
> + uint32_t, uint32_t, uint32_t))
> +{
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> +
> + return gvec_vvv_vl(ctx, a, 32, mop, func);
> +}
You can move check_vec into gvec_vvv_vl, removing it from gvec_vvv.
> +static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
> + TCGv_i64, TCGv_i64, TCGv_i64))
> +{
> + int i;
> + TCGv_i64 rh, rl, ah, al, bh, bl;
Have check_vec here ...
> +static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
> + TCGv_i64, TCGv_i64, TCGv_i64))
> +{
> + if (!check_vec(ctx, 16)) {
> + return true;
> + }
> +
> + return gen_vaddsub_q_vl(ctx, a, 16, func);
> +}
> +
> +static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
> + TCGv_i64, TCGv_i64, TCGv_i64))
> +{
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> + return gen_vaddsub_q_vl(ctx, a, 16, func);
> +}
... instead of these two places.
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub
2023-09-10 1:44 ` Richard Henderson
@ 2023-09-11 12:27 ` gaosong
0 siblings, 0 replies; 87+ messages in thread
From: gaosong @ 2023-09-11 12:27 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: maobibo
在 2023/9/10 上午9:44, Richard Henderson 写道:
> On 9/7/23 01:31, Song Gao wrote:
>> --- a/target/loongarch/insn_trans/trans_vec.c.inc
>> +++ b/target/loongarch/insn_trans/trans_vec.c.inc
>> @@ -208,6 +208,16 @@ static bool gvec_vvv(DisasContext *ctx, arg_vvv
>> *a, MemOp mop,
>> return gvec_vvv_vl(ctx, a, 16, mop, func);
>> }
>> +static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
>> + void (*func)(unsigned, uint32_t, uint32_t,
>> + uint32_t, uint32_t, uint32_t))
>> +{
>> + if (!check_vec(ctx, 32)) {
>> + return true;
>> + }
>> +
>> + return gvec_vvv_vl(ctx, a, 32, mop, func);
>> +}
>
> You can move check_vec into gvec_vvv_vl, removing it from gvec_vvv.
>
>> +static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t
>> oprsz,
>> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
>> + TCGv_i64, TCGv_i64, TCGv_i64))
>> +{
>> + int i;
>> + TCGv_i64 rh, rl, ah, al, bh, bl;
>
> Have check_vec here ...
>
>> +static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
>> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
>> + TCGv_i64, TCGv_i64, TCGv_i64))
>> +{
>> + if (!check_vec(ctx, 16)) {
>> + return true;
>> + }
>> +
>> + return gen_vaddsub_q_vl(ctx, a, 16, func);
>> +}
>> +
>> +static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
>> + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
>> + TCGv_i64, TCGv_i64, TCGv_i64))
>> +{
>> + if (!check_vec(ctx, 32)) {
>> + return true;
>> + }
>> + return gen_vaddsub_q_vl(ctx, a, 16, func);
>> +}
>
> ... instead of these two places.
>
>
Ok, I will correct all similar patches.
Thanks.
Song Gao
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 15/57] target/loongarch: Implement xvreplgr2vr
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (13 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 14/57] target/loongarch: Implement xvadd/xvsub Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 1:46 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 16/57] target/loongarch: Implement xvaddi/xvsubi Song Gao
` (41 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVREPLGR2VR.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 5 ++++
target/loongarch/disas.c | 10 +++++++
target/loongarch/insn_trans/trans_vec.c.inc | 29 ++++++++++++++++-----
3 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index bcc18fb6c5..04bd238995 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1310,3 +1310,8 @@ xvsub_h 0111 01000000 11001 ..... ..... ..... @vvv
xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
+
+xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
+xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
+xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
+xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index d8b62ba532..c47f455ed0 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1708,6 +1708,11 @@ static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
}
+static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -1718,3 +1723,8 @@ INSN_LASX(xvsub_h, vvv)
INSN_LASX(xvsub_w, vvv)
INSN_LASX(xvsub_d, vvv)
INSN_LASX(xvsub_q, vvv)
+
+INSN_LASX(xvreplgr2vr_b, vr)
+INSN_LASX(xvreplgr2vr_h, vr)
+INSN_LASX(xvreplgr2vr_w, vr)
+INSN_LASX(xvreplgr2vr_d, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 47cf053e0a..a7323e0490 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4407,27 +4407,42 @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
return true;
}
-static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
+static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
+ uint32_t oprsz, MemOp mop)
{
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
- if (!avail_LSX(ctx)) {
- return false;
- }
+ tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
+ oprsz, ctx->vl/8, src);
+ return true;
+}
+static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
+{
if (!check_vec(ctx, 16)) {
return true;
}
- tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
- 16, ctx->vl/8, src);
- return true;
+ return gvec_dup_vl(ctx, a, 16, mop);
+}
+
+static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gvec_dup_vl(ctx, a, 32, mop);
}
TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
+TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
+TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
+TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
+TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 15/57] target/loongarch: Implement xvreplgr2vr
2023-09-07 8:31 ` [PATCH RESEND v5 15/57] target/loongarch: Implement xvreplgr2vr Song Gao
@ 2023-09-10 1:46 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-10 1:46 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> --- a/target/loongarch/insn_trans/trans_vec.c.inc
> +++ b/target/loongarch/insn_trans/trans_vec.c.inc
> @@ -4407,27 +4407,42 @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
> return true;
> }
>
> -static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
> +static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
> + uint32_t oprsz, MemOp mop)
> {
> TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
>
> - if (!avail_LSX(ctx)) {
> - return false;
> - }
> + tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
> + oprsz, ctx->vl/8, src);
> + return true;
> +}
>
> +static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
> +{
> if (!check_vec(ctx, 16)) {
> return true;
> }
check_vec in gvec_dup_vl instead.
Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 16/57] target/loongarch: Implement xvaddi/xvsubi
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (14 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 15/57] target/loongarch: Implement xvreplgr2vr Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 1:50 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 17/57] target/loongarch: Implement xvneg Song Gao
` (40 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVADDI.{B/H/W/D}U;
- XVSUBI.{B/H/W/D}U.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 9 +++++++
target/loongarch/disas.c | 14 +++++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 28 +++++++++++++++++++++
3 files changed, 51 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 04bd238995..c48dca70b8 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1311,6 +1311,15 @@ xvsub_w 0111 01000000 11010 ..... ..... ..... @vvv
xvsub_d 0111 01000000 11011 ..... ..... ..... @vvv
xvsub_q 0111 01010010 11011 ..... ..... ..... @vvv
+xvaddi_bu 0111 01101000 10100 ..... ..... ..... @vv_ui5
+xvaddi_hu 0111 01101000 10101 ..... ..... ..... @vv_ui5
+xvaddi_wu 0111 01101000 10110 ..... ..... ..... @vv_ui5
+xvaddi_du 0111 01101000 10111 ..... ..... ..... @vv_ui5
+xvsubi_bu 0111 01101000 11000 ..... ..... ..... @vv_ui5
+xvsubi_hu 0111 01101000 11001 ..... ..... ..... @vv_ui5
+xvsubi_wu 0111 01101000 11010 ..... ..... ..... @vv_ui5
+xvsubi_du 0111 01101000 11011 ..... ..... ..... @vv_ui5
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c47f455ed0..20df9c7c99 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1713,6 +1713,11 @@ static void output_vr_x(DisasContext *ctx, arg_vr *a, const char *mnemonic)
output(ctx, mnemonic, "x%d, r%d", a->vd, a->rj);
}
+static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -1724,6 +1729,15 @@ INSN_LASX(xvsub_w, vvv)
INSN_LASX(xvsub_d, vvv)
INSN_LASX(xvsub_q, vvv)
+INSN_LASX(xvaddi_bu, vv_i)
+INSN_LASX(xvaddi_hu, vv_i)
+INSN_LASX(xvaddi_wu, vv_i)
+INSN_LASX(xvaddi_du, vv_i)
+INSN_LASX(xvsubi_bu, vv_i)
+INSN_LASX(xvsubi_hu, vv_i)
+INSN_LASX(xvsubi_wu, vv_i)
+INSN_LASX(xvsubi_du, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index a7323e0490..610a492d0c 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -266,6 +266,17 @@ static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
return gvec_vv_i_vl(ctx, a, 16, mop, func);
}
+static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gvec_vv_i_vl(ctx,a, 32, mop, func);
+}
+
static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
uint32_t oprsz, MemOp mop)
{
@@ -285,6 +296,15 @@ static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
return gvec_subi_vl(ctx, a, 16, mop);
}
+static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gvec_subi_vl(ctx, a, 32, mop);
+}
+
TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
@@ -365,6 +385,14 @@ TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
TRANS(vsubi_du, LSX, gvec_subi, MO_64)
+TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
+TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
+TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
+TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
+TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
+TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
+TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
+TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 16/57] target/loongarch: Implement xvaddi/xvsubi
2023-09-07 8:31 ` [PATCH RESEND v5 16/57] target/loongarch: Implement xvaddi/xvsubi Song Gao
@ 2023-09-10 1:50 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-10 1:50 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> +static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
> + void (*func)(unsigned, uint32_t, uint32_t,
> + int64_t, uint32_t, uint32_t))
> +{
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> +
> + return gvec_vv_i_vl(ctx,a, 32, mop, func);
Move check_vec into gvec_vv_i_vl.
> +static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
> +{
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> +
> + return gvec_subi_vl(ctx, a, 32, mop);
Likewise.
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 17/57] target/loongarch: Implement xvneg
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (15 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 16/57] target/loongarch: Implement xvaddi/xvsubi Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-10 1:51 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 18/57] target/loongarch: Implement xvsadd/xvssub Song Gao
` (39 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVNEG.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 5 +++++
target/loongarch/disas.c | 10 ++++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 15 +++++++++++++++
3 files changed, 30 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index c48dca70b8..759172628f 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1320,6 +1320,11 @@ xvsubi_hu 0111 01101000 11001 ..... ..... ..... @vv_ui5
xvsubi_wu 0111 01101000 11010 ..... ..... ..... @vv_ui5
xvsubi_du 0111 01101000 11011 ..... ..... ..... @vv_ui5
+xvneg_b 0111 01101001 11000 01100 ..... ..... @vv
+xvneg_h 0111 01101001 11000 01101 ..... ..... @vv
+xvneg_w 0111 01101001 11000 01110 ..... ..... @vv
+xvneg_d 0111 01101001 11000 01111 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 20df9c7c99..a7455840a0 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1718,6 +1718,11 @@ static void output_vv_i_x(DisasContext *ctx, arg_vv_i *a, const char *mnemonic)
output(ctx, mnemonic, "x%d, x%d, 0x%x", a->vd, a->vj, a->imm);
}
+static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -1738,6 +1743,11 @@ INSN_LASX(xvsubi_hu, vv_i)
INSN_LASX(xvsubi_wu, vv_i)
INSN_LASX(xvsubi_du, vv_i)
+INSN_LASX(xvneg_b, vv)
+INSN_LASX(xvneg_h, vv)
+INSN_LASX(xvneg_w, vv)
+INSN_LASX(xvneg_d, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 610a492d0c..7230181071 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -243,6 +243,17 @@ static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
return gvec_vv_vl(ctx, a, 16, mop, func);
}
+static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gvec_vv_vl(ctx, a, 32, mop, func);
+}
+
static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
uint32_t oprsz, MemOp mop,
void (*func)(unsigned, uint32_t, uint32_t,
@@ -398,6 +409,10 @@ TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
+TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
+TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
+TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
+TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 17/57] target/loongarch: Implement xvneg
2023-09-07 8:31 ` [PATCH RESEND v5 17/57] target/loongarch: Implement xvneg Song Gao
@ 2023-09-10 1:51 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-10 1:51 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> +static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
> + void (*func)(unsigned, uint32_t, uint32_t,
> + uint32_t, uint32_t))
> +{
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> +
> + return gvec_vv_vl(ctx, a, 32, mop, func);
Move check_vec.
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 18/57] target/loongarch: Implement xvsadd/xvssub
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (16 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 17/57] target/loongarch: Implement xvneg Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 19/57] target/loongarch: Implement xvhaddw/xvhsubw Song Gao
` (38 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSADD.{B/H/W/D}[U];
- XVSSUB.{B/H/W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 18 ++++++++++++++++++
target/loongarch/disas.c | 17 +++++++++++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 17 +++++++++++++++++
3 files changed, 52 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 759172628f..32f857ff7c 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1325,6 +1325,24 @@ xvneg_h 0111 01101001 11000 01101 ..... ..... @vv
xvneg_w 0111 01101001 11000 01110 ..... ..... @vv
xvneg_d 0111 01101001 11000 01111 ..... ..... @vv
+xvsadd_b 0111 01000100 01100 ..... ..... ..... @vvv
+xvsadd_h 0111 01000100 01101 ..... ..... ..... @vvv
+xvsadd_w 0111 01000100 01110 ..... ..... ..... @vvv
+xvsadd_d 0111 01000100 01111 ..... ..... ..... @vvv
+xvsadd_bu 0111 01000100 10100 ..... ..... ..... @vvv
+xvsadd_hu 0111 01000100 10101 ..... ..... ..... @vvv
+xvsadd_wu 0111 01000100 10110 ..... ..... ..... @vvv
+xvsadd_du 0111 01000100 10111 ..... ..... ..... @vvv
+
+xvssub_b 0111 01000100 10000 ..... ..... ..... @vvv
+xvssub_h 0111 01000100 10001 ..... ..... ..... @vvv
+xvssub_w 0111 01000100 10010 ..... ..... ..... @vvv
+xvssub_d 0111 01000100 10011 ..... ..... ..... @vvv
+xvssub_bu 0111 01000100 11000 ..... ..... ..... @vvv
+xvssub_hu 0111 01000100 11001 ..... ..... ..... @vvv
+xvssub_wu 0111 01000100 11010 ..... ..... ..... @vvv
+xvssub_du 0111 01000100 11011 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index a7455840a0..4ba4fbfc64 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1748,6 +1748,23 @@ INSN_LASX(xvneg_h, vv)
INSN_LASX(xvneg_w, vv)
INSN_LASX(xvneg_d, vv)
+INSN_LASX(xvsadd_b, vvv)
+INSN_LASX(xvsadd_h, vvv)
+INSN_LASX(xvsadd_w, vvv)
+INSN_LASX(xvsadd_d, vvv)
+INSN_LASX(xvsadd_bu, vvv)
+INSN_LASX(xvsadd_hu, vvv)
+INSN_LASX(xvsadd_wu, vvv)
+INSN_LASX(xvsadd_du, vvv)
+INSN_LASX(xvssub_b, vvv)
+INSN_LASX(xvssub_h, vvv)
+INSN_LASX(xvssub_w, vvv)
+INSN_LASX(xvssub_d, vvv)
+INSN_LASX(xvssub_bu, vvv)
+INSN_LASX(xvssub_hu, vvv)
+INSN_LASX(xvssub_wu, vvv)
+INSN_LASX(xvssub_du, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 7230181071..fd18f4cef7 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -431,6 +431,23 @@ TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
+TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
+TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
+TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
+TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
+TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
+TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
+TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
+TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
+TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
+TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
+TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
+TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
+
TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 19/57] target/loongarch: Implement xvhaddw/xvhsubw
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (17 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 18/57] target/loongarch: Implement xvsadd/xvssub Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 21:20 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 20/57] target/loongarch: Implement xvaddw/xvsubw Song Gao
` (37 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU};
- XVHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 18 +++++++++++
target/loongarch/disas.c | 17 +++++++++++
target/loongarch/vec_helper.c | 34 ++++++++++++++++-----
target/loongarch/insn_trans/trans_vec.c.inc | 26 ++++++++++++++++
4 files changed, 88 insertions(+), 7 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 32f857ff7c..ba0b36f4a7 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1343,6 +1343,24 @@ xvssub_hu 0111 01000100 11001 ..... ..... ..... @vvv
xvssub_wu 0111 01000100 11010 ..... ..... ..... @vvv
xvssub_du 0111 01000100 11011 ..... ..... ..... @vvv
+xvhaddw_h_b 0111 01000101 01000 ..... ..... ..... @vvv
+xvhaddw_w_h 0111 01000101 01001 ..... ..... ..... @vvv
+xvhaddw_d_w 0111 01000101 01010 ..... ..... ..... @vvv
+xvhaddw_q_d 0111 01000101 01011 ..... ..... ..... @vvv
+xvhaddw_hu_bu 0111 01000101 10000 ..... ..... ..... @vvv
+xvhaddw_wu_hu 0111 01000101 10001 ..... ..... ..... @vvv
+xvhaddw_du_wu 0111 01000101 10010 ..... ..... ..... @vvv
+xvhaddw_qu_du 0111 01000101 10011 ..... ..... ..... @vvv
+
+xvhsubw_h_b 0111 01000101 01100 ..... ..... ..... @vvv
+xvhsubw_w_h 0111 01000101 01101 ..... ..... ..... @vvv
+xvhsubw_d_w 0111 01000101 01110 ..... ..... ..... @vvv
+xvhsubw_q_d 0111 01000101 01111 ..... ..... ..... @vvv
+xvhsubw_hu_bu 0111 01000101 10100 ..... ..... ..... @vvv
+xvhsubw_wu_hu 0111 01000101 10101 ..... ..... ..... @vvv
+xvhsubw_du_wu 0111 01000101 10110 ..... ..... ..... @vvv
+xvhsubw_qu_du 0111 01000101 10111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 4ba4fbfc64..c810a52f0d 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1765,6 +1765,23 @@ INSN_LASX(xvssub_hu, vvv)
INSN_LASX(xvssub_wu, vvv)
INSN_LASX(xvssub_du, vvv)
+INSN_LASX(xvhaddw_h_b, vvv)
+INSN_LASX(xvhaddw_w_h, vvv)
+INSN_LASX(xvhaddw_d_w, vvv)
+INSN_LASX(xvhaddw_q_d, vvv)
+INSN_LASX(xvhaddw_hu_bu, vvv)
+INSN_LASX(xvhaddw_wu_hu, vvv)
+INSN_LASX(xvhaddw_du_wu, vvv)
+INSN_LASX(xvhaddw_qu_du, vvv)
+INSN_LASX(xvhsubw_h_b, vvv)
+INSN_LASX(xvhsubw_w_h, vvv)
+INSN_LASX(xvhsubw_d_w, vvv)
+INSN_LASX(xvhsubw_q_d, vvv)
+INSN_LASX(xvhsubw_hu_bu, vvv)
+INSN_LASX(xvhsubw_wu_hu, vvv)
+INSN_LASX(xvhsubw_du_wu, vvv)
+INSN_LASX(xvhsubw_qu_du, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index c784f98ab2..2ce0ca41a7 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -13,6 +13,7 @@
#include "internals.h"
#include "tcg/tcg.h"
#include "vec.h"
+#include "tcg/tcg-gvec-desc.h"
#define DO_ADD(a, b) (a + b)
#define DO_SUB(a, b) (a - b)
@@ -25,8 +26,9 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
} \
}
@@ -37,11 +39,16 @@ DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
+ for (i = 0; i < oprsz / 16 ; i++) {
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i)));
+ }
}
DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
@@ -50,11 +57,16 @@ DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i)));
+ }
}
DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
@@ -63,12 +75,16 @@ DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
- int128_make64((uint64_t)Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i ++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i)));
+ }
}
DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
@@ -77,12 +93,16 @@ DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
- int128_make64((uint64_t)Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i)));
+ }
}
#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index fd18f4cef7..b2bc11fed1 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -113,6 +113,15 @@ static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
return gen_vvv_vl(ctx, a, 16, fn);
}
+static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vvv_vl(ctx, a, 32, fn);
+}
+
static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
gen_helper_gvec_2_ptr *fn)
{
@@ -465,6 +474,23 @@ TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
+TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
+TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
+TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
+TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
+TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
+TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
+TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
+TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
+TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
+TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
+TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
+TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
+TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
+TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
+TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
+TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
+
static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
TCGv_vec t1, t2;
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 19/57] target/loongarch: Implement xvhaddw/xvhsubw
2023-09-07 8:31 ` [PATCH RESEND v5 19/57] target/loongarch: Implement xvhaddw/xvhsubw Song Gao
@ 2023-09-11 21:20 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 21:20 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> This patch includes:
> - XVHADDW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU};
> - XVHSUBW.{H.B/W.H/D.W/Q.D/HU.BU/WU.HU/DU.WU/QU.DU}.
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/insns.decode | 18 +++++++++++
> target/loongarch/disas.c | 17 +++++++++++
> target/loongarch/vec_helper.c | 34 ++++++++++++++++-----
> target/loongarch/insn_trans/trans_vec.c.inc | 26 ++++++++++++++++
> 4 files changed, 88 insertions(+), 7 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 20/57] target/loongarch: Implement xvaddw/xvsubw
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (18 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 19/57] target/loongarch: Implement xvhaddw/xvhsubw Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 21:25 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 21/57] target/loongarch: Implement xavg/xvagr Song Gao
` (36 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- XVSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- XVADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 45 ++++++++
target/loongarch/disas.c | 43 +++++++
target/loongarch/vec_helper.c | 120 ++++++++++++++------
target/loongarch/insn_trans/trans_vec.c.inc | 41 +++++++
4 files changed, 215 insertions(+), 34 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index ba0b36f4a7..e1d8b30179 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1361,6 +1361,51 @@ xvhsubw_wu_hu 0111 01000101 10101 ..... ..... ..... @vvv
xvhsubw_du_wu 0111 01000101 10110 ..... ..... ..... @vvv
xvhsubw_qu_du 0111 01000101 10111 ..... ..... ..... @vvv
+xvaddwev_h_b 0111 01000001 11100 ..... ..... ..... @vvv
+xvaddwev_w_h 0111 01000001 11101 ..... ..... ..... @vvv
+xvaddwev_d_w 0111 01000001 11110 ..... ..... ..... @vvv
+xvaddwev_q_d 0111 01000001 11111 ..... ..... ..... @vvv
+xvaddwod_h_b 0111 01000010 00100 ..... ..... ..... @vvv
+xvaddwod_w_h 0111 01000010 00101 ..... ..... ..... @vvv
+xvaddwod_d_w 0111 01000010 00110 ..... ..... ..... @vvv
+xvaddwod_q_d 0111 01000010 00111 ..... ..... ..... @vvv
+
+xvsubwev_h_b 0111 01000010 00000 ..... ..... ..... @vvv
+xvsubwev_w_h 0111 01000010 00001 ..... ..... ..... @vvv
+xvsubwev_d_w 0111 01000010 00010 ..... ..... ..... @vvv
+xvsubwev_q_d 0111 01000010 00011 ..... ..... ..... @vvv
+xvsubwod_h_b 0111 01000010 01000 ..... ..... ..... @vvv
+xvsubwod_w_h 0111 01000010 01001 ..... ..... ..... @vvv
+xvsubwod_d_w 0111 01000010 01010 ..... ..... ..... @vvv
+xvsubwod_q_d 0111 01000010 01011 ..... ..... ..... @vvv
+
+xvaddwev_h_bu 0111 01000010 11100 ..... ..... ..... @vvv
+xvaddwev_w_hu 0111 01000010 11101 ..... ..... ..... @vvv
+xvaddwev_d_wu 0111 01000010 11110 ..... ..... ..... @vvv
+xvaddwev_q_du 0111 01000010 11111 ..... ..... ..... @vvv
+xvaddwod_h_bu 0111 01000011 00100 ..... ..... ..... @vvv
+xvaddwod_w_hu 0111 01000011 00101 ..... ..... ..... @vvv
+xvaddwod_d_wu 0111 01000011 00110 ..... ..... ..... @vvv
+xvaddwod_q_du 0111 01000011 00111 ..... ..... ..... @vvv
+
+xvsubwev_h_bu 0111 01000011 00000 ..... ..... ..... @vvv
+xvsubwev_w_hu 0111 01000011 00001 ..... ..... ..... @vvv
+xvsubwev_d_wu 0111 01000011 00010 ..... ..... ..... @vvv
+xvsubwev_q_du 0111 01000011 00011 ..... ..... ..... @vvv
+xvsubwod_h_bu 0111 01000011 01000 ..... ..... ..... @vvv
+xvsubwod_w_hu 0111 01000011 01001 ..... ..... ..... @vvv
+xvsubwod_d_wu 0111 01000011 01010 ..... ..... ..... @vvv
+xvsubwod_q_du 0111 01000011 01011 ..... ..... ..... @vvv
+
+xvaddwev_h_bu_b 0111 01000011 11100 ..... ..... ..... @vvv
+xvaddwev_w_hu_h 0111 01000011 11101 ..... ..... ..... @vvv
+xvaddwev_d_wu_w 0111 01000011 11110 ..... ..... ..... @vvv
+xvaddwev_q_du_d 0111 01000011 11111 ..... ..... ..... @vvv
+xvaddwod_h_bu_b 0111 01000100 00000 ..... ..... ..... @vvv
+xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @vvv
+xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @vvv
+xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c810a52f0d..e3e57e1d05 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1782,6 +1782,49 @@ INSN_LASX(xvhsubw_wu_hu, vvv)
INSN_LASX(xvhsubw_du_wu, vvv)
INSN_LASX(xvhsubw_qu_du, vvv)
+INSN_LASX(xvaddwev_h_b, vvv)
+INSN_LASX(xvaddwev_w_h, vvv)
+INSN_LASX(xvaddwev_d_w, vvv)
+INSN_LASX(xvaddwev_q_d, vvv)
+INSN_LASX(xvaddwod_h_b, vvv)
+INSN_LASX(xvaddwod_w_h, vvv)
+INSN_LASX(xvaddwod_d_w, vvv)
+INSN_LASX(xvaddwod_q_d, vvv)
+INSN_LASX(xvsubwev_h_b, vvv)
+INSN_LASX(xvsubwev_w_h, vvv)
+INSN_LASX(xvsubwev_d_w, vvv)
+INSN_LASX(xvsubwev_q_d, vvv)
+INSN_LASX(xvsubwod_h_b, vvv)
+INSN_LASX(xvsubwod_w_h, vvv)
+INSN_LASX(xvsubwod_d_w, vvv)
+INSN_LASX(xvsubwod_q_d, vvv)
+
+INSN_LASX(xvaddwev_h_bu, vvv)
+INSN_LASX(xvaddwev_w_hu, vvv)
+INSN_LASX(xvaddwev_d_wu, vvv)
+INSN_LASX(xvaddwev_q_du, vvv)
+INSN_LASX(xvaddwod_h_bu, vvv)
+INSN_LASX(xvaddwod_w_hu, vvv)
+INSN_LASX(xvaddwod_d_wu, vvv)
+INSN_LASX(xvaddwod_q_du, vvv)
+INSN_LASX(xvsubwev_h_bu, vvv)
+INSN_LASX(xvsubwev_w_hu, vvv)
+INSN_LASX(xvsubwev_d_wu, vvv)
+INSN_LASX(xvsubwev_q_du, vvv)
+INSN_LASX(xvsubwod_h_bu, vvv)
+INSN_LASX(xvsubwod_w_hu, vvv)
+INSN_LASX(xvsubwod_d_wu, vvv)
+INSN_LASX(xvsubwod_q_du, vvv)
+
+INSN_LASX(xvaddwev_h_bu_b, vvv)
+INSN_LASX(xvaddwev_w_hu_h, vvv)
+INSN_LASX(xvaddwev_d_wu_w, vvv)
+INSN_LASX(xvaddwev_q_du_d, vvv)
+INSN_LASX(xvaddwod_h_bu_b, vvv)
+INSN_LASX(xvaddwod_w_hu_h, vvv)
+INSN_LASX(xvaddwod_d_wu_w, vvv)
+INSN_LASX(xvaddwod_q_du_d, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 2ce0ca41a7..fc3b07e8d2 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -106,133 +106,173 @@ void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
}
#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->E1(0)) TD; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
} \
}
#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->E1(0)) TD; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
} \
}
-void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)),
+ int128_makes64(Vk->D(2 * i)));
+ }
}
DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
-void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)),
+ int128_makes64(Vk->D(2 * i +1)));
+ }
}
DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
-void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_sub(int128_makes64(Vj->D(0)), int128_makes64(Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)),
+ int128_makes64(Vk->D(2 * i)));
+ }
}
DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
-void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_sub(int128_makes64(Vj->D(1)), int128_makes64(Vk->D(1)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i + 1)));
+ }
}
DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
-void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
- int128_make64((uint64_t)Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
+ int128_make64(Vk->UD(2 * i)));
+ }
}
DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
-void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
- int128_make64((uint64_t)Vk->D(1)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i + 1)));
+ }
}
DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
-void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(0)),
- int128_make64((uint64_t)Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)),
+ int128_make64(Vk->UD(2 * i)));
+ }
}
DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
-void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_sub(int128_make64((uint64_t)Vj->D(1)),
- int128_make64((uint64_t)Vk->D(1)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i + 1)));
+ }
}
DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
@@ -240,7 +280,7 @@ DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
VReg *Vd = (VReg *)vd; \
@@ -248,13 +288,15 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->ES1(0)) TDS; \
typedef __typeof(Vd->EU1(0)) TDU; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
} \
}
#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
VReg *Vd = (VReg *)vd; \
@@ -262,33 +304,43 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->ES1(0)) TDS; \
typedef __typeof(Vd->EU1(0)) TDU; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
} \
}
-void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(0)),
- int128_makes64(Vk->D(0)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
+ int128_makes64(Vk->D(2 * i)));
+ }
}
DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
-void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_add(int128_make64((uint64_t)Vj->D(1)),
- int128_makes64(Vk->D(1)));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i + 1)));
+ }
}
DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index b2bc11fed1..8234d4670a 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -573,6 +573,10 @@ TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
+TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
+TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
+TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
+TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
@@ -652,6 +656,11 @@ TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
+TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
+TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
+TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
+TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
+
static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -735,6 +744,10 @@ TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
+TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
+TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
+TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
+TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -814,6 +827,10 @@ TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
+TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
+TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
+TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
+TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -889,6 +906,10 @@ TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
+TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
+TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
+TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
+TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -968,6 +989,10 @@ TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
+TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
+TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
+TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
+TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -1043,6 +1068,10 @@ TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
+TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
+TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
+TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
+TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -1122,6 +1151,10 @@ TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
+TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
+TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
+TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
+TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -1205,6 +1238,10 @@ TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
+TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
+TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
+TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
+TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -1285,6 +1322,10 @@ TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
+TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
+TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
+TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
+TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
void (*gen_shr_vec)(unsigned, TCGv_vec,
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 20/57] target/loongarch: Implement xvaddw/xvsubw
2023-09-07 8:31 ` [PATCH RESEND v5 20/57] target/loongarch: Implement xvaddw/xvsubw Song Gao
@ 2023-09-11 21:25 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 21:25 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> This patch includes:
> - XVADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
> - XVSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
> - XVADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/insns.decode | 45 ++++++++
> target/loongarch/disas.c | 43 +++++++
> target/loongarch/vec_helper.c | 120 ++++++++++++++------
> target/loongarch/insn_trans/trans_vec.c.inc | 41 +++++++
> 4 files changed, 215 insertions(+), 34 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 21/57] target/loongarch: Implement xavg/xvagr
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (19 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 20/57] target/loongarch: Implement xvaddw/xvsubw Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 21:27 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 22/57] target/loongarch: Implement xvabsd Song Gao
` (35 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVAVG.{B/H/W/D/}[U];
- XVAVGR.{B/H/W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 17 ++++++++++++++++
target/loongarch/disas.c | 17 ++++++++++++++++
target/loongarch/vec_helper.c | 22 +++++++++++----------
target/loongarch/insn_trans/trans_vec.c.inc | 16 +++++++++++++++
4 files changed, 62 insertions(+), 10 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index e1d8b30179..a2cb39750d 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1406,6 +1406,23 @@ xvaddwod_w_hu_h 0111 01000100 00001 ..... ..... ..... @vvv
xvaddwod_d_wu_w 0111 01000100 00010 ..... ..... ..... @vvv
xvaddwod_q_du_d 0111 01000100 00011 ..... ..... ..... @vvv
+xvavg_b 0111 01000110 01000 ..... ..... ..... @vvv
+xvavg_h 0111 01000110 01001 ..... ..... ..... @vvv
+xvavg_w 0111 01000110 01010 ..... ..... ..... @vvv
+xvavg_d 0111 01000110 01011 ..... ..... ..... @vvv
+xvavg_bu 0111 01000110 01100 ..... ..... ..... @vvv
+xvavg_hu 0111 01000110 01101 ..... ..... ..... @vvv
+xvavg_wu 0111 01000110 01110 ..... ..... ..... @vvv
+xvavg_du 0111 01000110 01111 ..... ..... ..... @vvv
+xvavgr_b 0111 01000110 10000 ..... ..... ..... @vvv
+xvavgr_h 0111 01000110 10001 ..... ..... ..... @vvv
+xvavgr_w 0111 01000110 10010 ..... ..... ..... @vvv
+xvavgr_d 0111 01000110 10011 ..... ..... ..... @vvv
+xvavgr_bu 0111 01000110 10100 ..... ..... ..... @vvv
+xvavgr_hu 0111 01000110 10101 ..... ..... ..... @vvv
+xvavgr_wu 0111 01000110 10110 ..... ..... ..... @vvv
+xvavgr_du 0111 01000110 10111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e3e57e1d05..f9d9583fcc 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1825,6 +1825,23 @@ INSN_LASX(xvaddwod_w_hu_h, vvv)
INSN_LASX(xvaddwod_d_wu_w, vvv)
INSN_LASX(xvaddwod_q_du_d, vvv)
+INSN_LASX(xvavg_b, vvv)
+INSN_LASX(xvavg_h, vvv)
+INSN_LASX(xvavg_w, vvv)
+INSN_LASX(xvavg_d, vvv)
+INSN_LASX(xvavg_bu, vvv)
+INSN_LASX(xvavg_hu, vvv)
+INSN_LASX(xvavg_wu, vvv)
+INSN_LASX(xvavg_du, vvv)
+INSN_LASX(xvavgr_b, vvv)
+INSN_LASX(xvavgr_h, vvv)
+INSN_LASX(xvavgr_w, vvv)
+INSN_LASX(xvavgr_d, vvv)
+INSN_LASX(xvavgr_bu, vvv)
+INSN_LASX(xvavgr_hu, vvv)
+INSN_LASX(xvavgr_wu, vvv)
+INSN_LASX(xvavgr_du, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index fc3b07e8d2..35b207aae1 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -350,16 +350,18 @@ DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
-#define DO_3OP(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
- } \
+#define DO_3OP(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
+ } \
}
DO_3OP(vavg_b, 8, B, DO_VAVG)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 8234d4670a..270dd2a08f 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1442,6 +1442,14 @@ TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
+TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
+TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
+TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
+TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
+TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
+TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
+TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
+TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
@@ -1523,6 +1531,14 @@ TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
+TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
+TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
+TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
+TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
+TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
+TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
+TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
+TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 21/57] target/loongarch: Implement xavg/xvagr
2023-09-07 8:31 ` [PATCH RESEND v5 21/57] target/loongarch: Implement xavg/xvagr Song Gao
@ 2023-09-11 21:27 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 21:27 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> This patch includes:
> - XVAVG.{B/H/W/D/}[U];
> - XVAVGR.{B/H/W/D}[U].
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/insns.decode | 17 ++++++++++++++++
> target/loongarch/disas.c | 17 ++++++++++++++++
> target/loongarch/vec_helper.c | 22 +++++++++++----------
> target/loongarch/insn_trans/trans_vec.c.inc | 16 +++++++++++++++
> 4 files changed, 62 insertions(+), 10 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 22/57] target/loongarch: Implement xvabsd
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (20 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 21/57] target/loongarch: Implement xavg/xvagr Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 23/57] target/loongarch: Implement xvadda Song Gao
` (34 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVABSD.{B/H/W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 9 +++++++++
target/loongarch/disas.c | 9 +++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++++++
3 files changed, 26 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index a2cb39750d..c086ee9b22 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1423,6 +1423,15 @@ xvavgr_hu 0111 01000110 10101 ..... ..... ..... @vvv
xvavgr_wu 0111 01000110 10110 ..... ..... ..... @vvv
xvavgr_du 0111 01000110 10111 ..... ..... ..... @vvv
+xvabsd_b 0111 01000110 00000 ..... ..... ..... @vvv
+xvabsd_h 0111 01000110 00001 ..... ..... ..... @vvv
+xvabsd_w 0111 01000110 00010 ..... ..... ..... @vvv
+xvabsd_d 0111 01000110 00011 ..... ..... ..... @vvv
+xvabsd_bu 0111 01000110 00100 ..... ..... ..... @vvv
+xvabsd_hu 0111 01000110 00101 ..... ..... ..... @vvv
+xvabsd_wu 0111 01000110 00110 ..... ..... ..... @vvv
+xvabsd_du 0111 01000110 00111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index f9d9583fcc..bbe7ad8322 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1842,6 +1842,15 @@ INSN_LASX(xvavgr_hu, vvv)
INSN_LASX(xvavgr_wu, vvv)
INSN_LASX(xvavgr_du, vvv)
+INSN_LASX(xvabsd_b, vvv)
+INSN_LASX(xvabsd_h, vvv)
+INSN_LASX(xvabsd_w, vvv)
+INSN_LASX(xvabsd_d, vvv)
+INSN_LASX(xvabsd_bu, vvv)
+INSN_LASX(xvabsd_hu, vvv)
+INSN_LASX(xvabsd_wu, vvv)
+INSN_LASX(xvabsd_du, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 270dd2a08f..b86a6f36c1 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1634,6 +1634,14 @@ TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
+TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
+TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
+TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
+TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
+TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
+TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
+TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
+TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 23/57] target/loongarch: Implement xvadda
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (21 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 22/57] target/loongarch: Implement xvabsd Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 24/57] target/loongarch: Implement xvmax/xvmin Song Gao
` (33 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVADDA.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 5 ++++
target/loongarch/disas.c | 5 ++++
target/loongarch/vec_helper.c | 30 +++++++++++----------
target/loongarch/insn_trans/trans_vec.c.inc | 4 +++
4 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index c086ee9b22..f3722e3aa7 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1432,6 +1432,11 @@ xvabsd_hu 0111 01000110 00101 ..... ..... ..... @vvv
xvabsd_wu 0111 01000110 00110 ..... ..... ..... @vvv
xvabsd_du 0111 01000110 00111 ..... ..... ..... @vvv
+xvadda_b 0111 01000101 11000 ..... ..... ..... @vvv
+xvadda_h 0111 01000101 11001 ..... ..... ..... @vvv
+xvadda_w 0111 01000101 11010 ..... ..... ..... @vvv
+xvadda_d 0111 01000101 11011 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index bbe7ad8322..51fbd78279 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1851,6 +1851,11 @@ INSN_LASX(xvabsd_hu, vvv)
INSN_LASX(xvabsd_wu, vvv)
INSN_LASX(xvabsd_du, vvv)
+INSN_LASX(xvadda_b, vvv)
+INSN_LASX(xvadda_h, vvv)
+INSN_LASX(xvadda_w, vvv)
+INSN_LASX(xvadda_d, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 35b207aae1..ec6d86cc83 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -394,22 +394,24 @@ DO_3OP(vabsd_du, 64, UD, DO_VABSD)
#define DO_VABS(a) ((a < 0) ? (-a) : (a))
-#define DO_VADDA(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i)) + DO_OP(Vk->E(i)); \
- } \
+#define DO_VADDA(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \
+ } \
}
-DO_VADDA(vadda_b, 8, B, DO_VABS)
-DO_VADDA(vadda_h, 16, H, DO_VABS)
-DO_VADDA(vadda_w, 32, W, DO_VABS)
-DO_VADDA(vadda_d, 64, D, DO_VABS)
+DO_VADDA(vadda_b, 8, B)
+DO_VADDA(vadda_h, 16, H)
+DO_VADDA(vadda_w, 32, W)
+DO_VADDA(vadda_d, 64, D)
#define DO_MIN(a, b) (a < b ? a : b)
#define DO_MAX(a, b) (a > b ? a : b)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index b86a6f36c1..6b4c85ce0b 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1695,6 +1695,10 @@ TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
+TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
+TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
+TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
+TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 24/57] target/loongarch: Implement xvmax/xvmin
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (22 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 23/57] target/loongarch: Implement xvadda Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 25/57] target/loongarch: Implement xvmul/xvmuh/xvmulw{ev/od} Song Gao
` (32 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVMAX[I].{B/H/W/D}[U];
- XVMIN[I].{B/H/W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 36 +++++++++++++++++++++
target/loongarch/disas.c | 34 +++++++++++++++++++
target/loongarch/vec_helper.c | 23 ++++++-------
target/loongarch/insn_trans/trans_vec.c.inc | 32 ++++++++++++++++++
4 files changed, 114 insertions(+), 11 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index f3722e3aa7..99aefcb651 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1437,6 +1437,42 @@ xvadda_h 0111 01000101 11001 ..... ..... ..... @vvv
xvadda_w 0111 01000101 11010 ..... ..... ..... @vvv
xvadda_d 0111 01000101 11011 ..... ..... ..... @vvv
+xvmax_b 0111 01000111 00000 ..... ..... ..... @vvv
+xvmax_h 0111 01000111 00001 ..... ..... ..... @vvv
+xvmax_w 0111 01000111 00010 ..... ..... ..... @vvv
+xvmax_d 0111 01000111 00011 ..... ..... ..... @vvv
+xvmax_bu 0111 01000111 01000 ..... ..... ..... @vvv
+xvmax_hu 0111 01000111 01001 ..... ..... ..... @vvv
+xvmax_wu 0111 01000111 01010 ..... ..... ..... @vvv
+xvmax_du 0111 01000111 01011 ..... ..... ..... @vvv
+
+xvmaxi_b 0111 01101001 00000 ..... ..... ..... @vv_i5
+xvmaxi_h 0111 01101001 00001 ..... ..... ..... @vv_i5
+xvmaxi_w 0111 01101001 00010 ..... ..... ..... @vv_i5
+xvmaxi_d 0111 01101001 00011 ..... ..... ..... @vv_i5
+xvmaxi_bu 0111 01101001 01000 ..... ..... ..... @vv_ui5
+xvmaxi_hu 0111 01101001 01001 ..... ..... ..... @vv_ui5
+xvmaxi_wu 0111 01101001 01010 ..... ..... ..... @vv_ui5
+xvmaxi_du 0111 01101001 01011 ..... ..... ..... @vv_ui5
+
+xvmin_b 0111 01000111 00100 ..... ..... ..... @vvv
+xvmin_h 0111 01000111 00101 ..... ..... ..... @vvv
+xvmin_w 0111 01000111 00110 ..... ..... ..... @vvv
+xvmin_d 0111 01000111 00111 ..... ..... ..... @vvv
+xvmin_bu 0111 01000111 01100 ..... ..... ..... @vvv
+xvmin_hu 0111 01000111 01101 ..... ..... ..... @vvv
+xvmin_wu 0111 01000111 01110 ..... ..... ..... @vvv
+xvmin_du 0111 01000111 01111 ..... ..... ..... @vvv
+
+xvmini_b 0111 01101001 00100 ..... ..... ..... @vv_i5
+xvmini_h 0111 01101001 00101 ..... ..... ..... @vv_i5
+xvmini_w 0111 01101001 00110 ..... ..... ..... @vv_i5
+xvmini_d 0111 01101001 00111 ..... ..... ..... @vv_i5
+xvmini_bu 0111 01101001 01100 ..... ..... ..... @vv_ui5
+xvmini_hu 0111 01101001 01101 ..... ..... ..... @vv_ui5
+xvmini_wu 0111 01101001 01110 ..... ..... ..... @vv_ui5
+xvmini_du 0111 01101001 01111 ..... ..... ..... @vv_ui5
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 51fbd78279..ef2c78147e 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1856,6 +1856,40 @@ INSN_LASX(xvadda_h, vvv)
INSN_LASX(xvadda_w, vvv)
INSN_LASX(xvadda_d, vvv)
+INSN_LASX(xvmax_b, vvv)
+INSN_LASX(xvmax_h, vvv)
+INSN_LASX(xvmax_w, vvv)
+INSN_LASX(xvmax_d, vvv)
+INSN_LASX(xvmin_b, vvv)
+INSN_LASX(xvmin_h, vvv)
+INSN_LASX(xvmin_w, vvv)
+INSN_LASX(xvmin_d, vvv)
+INSN_LASX(xvmax_bu, vvv)
+INSN_LASX(xvmax_hu, vvv)
+INSN_LASX(xvmax_wu, vvv)
+INSN_LASX(xvmax_du, vvv)
+INSN_LASX(xvmin_bu, vvv)
+INSN_LASX(xvmin_hu, vvv)
+INSN_LASX(xvmin_wu, vvv)
+INSN_LASX(xvmin_du, vvv)
+
+INSN_LASX(xvmaxi_b, vv_i)
+INSN_LASX(xvmaxi_h, vv_i)
+INSN_LASX(xvmaxi_w, vv_i)
+INSN_LASX(xvmaxi_d, vv_i)
+INSN_LASX(xvmini_b, vv_i)
+INSN_LASX(xvmini_h, vv_i)
+INSN_LASX(xvmini_w, vv_i)
+INSN_LASX(xvmini_d, vv_i)
+INSN_LASX(xvmaxi_bu, vv_i)
+INSN_LASX(xvmaxi_hu, vv_i)
+INSN_LASX(xvmaxi_wu, vv_i)
+INSN_LASX(xvmaxi_du, vv_i)
+INSN_LASX(xvmini_bu, vv_i)
+INSN_LASX(xvmini_hu, vv_i)
+INSN_LASX(xvmini_wu, vv_i)
+INSN_LASX(xvmini_du, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index ec6d86cc83..fdf8b3dd64 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -416,17 +416,18 @@ DO_VADDA(vadda_d, 64, D)
#define DO_MIN(a, b) (a < b ? a : b)
#define DO_MAX(a, b) (a > b ? a : b)
-#define VMINMAXI(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
- } \
+#define VMINMAXI(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
+ } \
}
VMINMAXI(vmini_b, 8, B, DO_MIN)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 6b4c85ce0b..bf93d0750b 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1708,6 +1708,14 @@ TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
+TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
+TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
+TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
+TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
+TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
+TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
+TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
+TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
@@ -1717,6 +1725,14 @@ TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
+TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
+TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
+TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
+TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
+TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
+TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
+TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
+TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
{
@@ -1818,6 +1834,14 @@ TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
+TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
+TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
+TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
+TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
+TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
+TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
+TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
+TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
int64_t imm, uint32_t oprsz, uint32_t maxsz)
@@ -1899,6 +1923,14 @@ TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
+TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
+TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
+TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
+TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
+TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
+TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
+TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
+TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 25/57] target/loongarch: Implement xvmul/xvmuh/xvmulw{ev/od}
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (23 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 24/57] target/loongarch: Implement xvmax/xvmin Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 26/57] target/loongarch: Implement xvmadd/xvmsub/xvmaddw{ev/od} Song Gao
` (31 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVMUL.{B/H/W/D};
- XVMUH.{B/H/W/D}[U];
- XVMULW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- XVMULW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 38 ++++++++
target/loongarch/disas.c | 38 ++++++++
target/loongarch/vec_helper.c | 55 ++++++------
target/loongarch/insn_trans/trans_vec.c.inc | 96 +++++++++++++++++++--
4 files changed, 195 insertions(+), 32 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 99aefcb651..0f9ebe641f 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1473,6 +1473,44 @@ xvmini_hu 0111 01101001 01101 ..... ..... ..... @vv_ui5
xvmini_wu 0111 01101001 01110 ..... ..... ..... @vv_ui5
xvmini_du 0111 01101001 01111 ..... ..... ..... @vv_ui5
+xvmul_b 0111 01001000 01000 ..... ..... ..... @vvv
+xvmul_h 0111 01001000 01001 ..... ..... ..... @vvv
+xvmul_w 0111 01001000 01010 ..... ..... ..... @vvv
+xvmul_d 0111 01001000 01011 ..... ..... ..... @vvv
+xvmuh_b 0111 01001000 01100 ..... ..... ..... @vvv
+xvmuh_h 0111 01001000 01101 ..... ..... ..... @vvv
+xvmuh_w 0111 01001000 01110 ..... ..... ..... @vvv
+xvmuh_d 0111 01001000 01111 ..... ..... ..... @vvv
+xvmuh_bu 0111 01001000 10000 ..... ..... ..... @vvv
+xvmuh_hu 0111 01001000 10001 ..... ..... ..... @vvv
+xvmuh_wu 0111 01001000 10010 ..... ..... ..... @vvv
+xvmuh_du 0111 01001000 10011 ..... ..... ..... @vvv
+
+xvmulwev_h_b 0111 01001001 00000 ..... ..... ..... @vvv
+xvmulwev_w_h 0111 01001001 00001 ..... ..... ..... @vvv
+xvmulwev_d_w 0111 01001001 00010 ..... ..... ..... @vvv
+xvmulwev_q_d 0111 01001001 00011 ..... ..... ..... @vvv
+xvmulwod_h_b 0111 01001001 00100 ..... ..... ..... @vvv
+xvmulwod_w_h 0111 01001001 00101 ..... ..... ..... @vvv
+xvmulwod_d_w 0111 01001001 00110 ..... ..... ..... @vvv
+xvmulwod_q_d 0111 01001001 00111 ..... ..... ..... @vvv
+xvmulwev_h_bu 0111 01001001 10000 ..... ..... ..... @vvv
+xvmulwev_w_hu 0111 01001001 10001 ..... ..... ..... @vvv
+xvmulwev_d_wu 0111 01001001 10010 ..... ..... ..... @vvv
+xvmulwev_q_du 0111 01001001 10011 ..... ..... ..... @vvv
+xvmulwod_h_bu 0111 01001001 10100 ..... ..... ..... @vvv
+xvmulwod_w_hu 0111 01001001 10101 ..... ..... ..... @vvv
+xvmulwod_d_wu 0111 01001001 10110 ..... ..... ..... @vvv
+xvmulwod_q_du 0111 01001001 10111 ..... ..... ..... @vvv
+xvmulwev_h_bu_b 0111 01001010 00000 ..... ..... ..... @vvv
+xvmulwev_w_hu_h 0111 01001010 00001 ..... ..... ..... @vvv
+xvmulwev_d_wu_w 0111 01001010 00010 ..... ..... ..... @vvv
+xvmulwev_q_du_d 0111 01001010 00011 ..... ..... ..... @vvv
+xvmulwod_h_bu_b 0111 01001010 00100 ..... ..... ..... @vvv
+xvmulwod_w_hu_h 0111 01001010 00101 ..... ..... ..... @vvv
+xvmulwod_d_wu_w 0111 01001010 00110 ..... ..... ..... @vvv
+xvmulwod_q_du_d 0111 01001010 00111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index ef2c78147e..f839373a7a 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1890,6 +1890,44 @@ INSN_LASX(xvmini_hu, vv_i)
INSN_LASX(xvmini_wu, vv_i)
INSN_LASX(xvmini_du, vv_i)
+INSN_LASX(xvmul_b, vvv)
+INSN_LASX(xvmul_h, vvv)
+INSN_LASX(xvmul_w, vvv)
+INSN_LASX(xvmul_d, vvv)
+INSN_LASX(xvmuh_b, vvv)
+INSN_LASX(xvmuh_h, vvv)
+INSN_LASX(xvmuh_w, vvv)
+INSN_LASX(xvmuh_d, vvv)
+INSN_LASX(xvmuh_bu, vvv)
+INSN_LASX(xvmuh_hu, vvv)
+INSN_LASX(xvmuh_wu, vvv)
+INSN_LASX(xvmuh_du, vvv)
+
+INSN_LASX(xvmulwev_h_b, vvv)
+INSN_LASX(xvmulwev_w_h, vvv)
+INSN_LASX(xvmulwev_d_w, vvv)
+INSN_LASX(xvmulwev_q_d, vvv)
+INSN_LASX(xvmulwod_h_b, vvv)
+INSN_LASX(xvmulwod_w_h, vvv)
+INSN_LASX(xvmulwod_d_w, vvv)
+INSN_LASX(xvmulwod_q_d, vvv)
+INSN_LASX(xvmulwev_h_bu, vvv)
+INSN_LASX(xvmulwev_w_hu, vvv)
+INSN_LASX(xvmulwev_d_wu, vvv)
+INSN_LASX(xvmulwev_q_du, vvv)
+INSN_LASX(xvmulwod_h_bu, vvv)
+INSN_LASX(xvmulwod_w_hu, vvv)
+INSN_LASX(xvmulwod_d_wu, vvv)
+INSN_LASX(xvmulwod_q_du, vvv)
+INSN_LASX(xvmulwev_h_bu_b, vvv)
+INSN_LASX(xvmulwev_w_hu_h, vvv)
+INSN_LASX(xvmulwev_d_wu_w, vvv)
+INSN_LASX(xvmulwev_q_du_d, vvv)
+INSN_LASX(xvmulwod_h_bu_b, vvv)
+INSN_LASX(xvmulwod_w_hu_h, vvv)
+INSN_LASX(xvmulwod_d_wu_w, vvv)
+INSN_LASX(xvmulwod_q_du_d, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index fdf8b3dd64..e152998094 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -447,50 +447,53 @@ VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
-#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) T; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
- } \
+#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) T; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
+ } \
}
-void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc)
{
- uint64_t l, h1, h2;
+ int i;
+ uint64_t l, h;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- muls64(&l, &h1, Vj->D(0), Vk->D(0));
- muls64(&l, &h2, Vj->D(1), Vk->D(1));
-
- Vd->D(0) = h1;
- Vd->D(1) = h2;
+ for (i = 0; i < oprsz / 8; i++) {
+ muls64(&l, &h, Vj->D(i), Vk->D(i));
+ Vd->D(i) = h;
+ }
}
DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
-void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t v)
+void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc)
{
- uint64_t l, h1, h2;
+ int i;
+ uint64_t l, h;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
- mulu64(&l, &h1, Vj->D(0), Vk->D(0));
- mulu64(&l, &h2, Vj->D(1), Vk->D(1));
-
- Vd->D(0) = h1;
- Vd->D(1) = h2;
+ for (i = 0; i < oprsz / 8; i++) {
+ mulu64(&l, &h, Vj->D(i), Vk->D(i));
+ Vd->D(i) = h;
+ }
}
DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index bf93d0750b..d5b2a73ca8 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -1936,6 +1936,10 @@ TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
+TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
+TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
+TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
+TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
@@ -1980,6 +1984,10 @@ TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
+TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
+TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
+TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
+TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
@@ -2024,6 +2032,10 @@ TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u)
TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
+TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u)
+TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
+TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
+TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2096,6 +2108,9 @@ static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
+TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
+TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
+TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
TCGv_i64 arg1, TCGv_i64 arg2)
@@ -2128,12 +2143,66 @@ static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
return true; \
}
-VMUL_Q(vmulwev_q_d, muls2, 0, 0)
-VMUL_Q(vmulwod_q_d, muls2, 1, 1)
-VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
-VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
-VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
-VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
+static bool gen_vmul_q_vl(DisasContext *ctx,
+ arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 rh, rl, arg1, arg2;
+ int i;
+
+ rh = tcg_temp_new_i64();
+ rl = tcg_temp_new_i64();
+ arg1 = tcg_temp_new_i64();
+ arg2 = tcg_temp_new_i64();
+
+ for (i = 0; i < oprsz / 16; i++) {
+ get_vreg64(arg1, a->vj, 2 * i + idx1);
+ get_vreg64(arg2, a->vk, 2 * i + idx2);
+
+ func(rl, rh, arg1, arg2);
+
+ set_vreg64(rh, a->vd, 2 * i + 1);
+ set_vreg64(rl, a->vd, 2 * i);
+ }
+
+ return true;
+}
+
+static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
+}
+
+static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
+}
+
+TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
+TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2204,6 +2273,9 @@ static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
+TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
+TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
+TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2274,6 +2346,9 @@ static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
+TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
+TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
+TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2344,6 +2419,9 @@ static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
+TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
+TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
+TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2416,6 +2494,9 @@ static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
+TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
+TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
+TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2485,6 +2566,9 @@ static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
+TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
+TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
+TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 26/57] target/loongarch: Implement xvmadd/xvmsub/xvmaddw{ev/od}
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (24 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 25/57] target/loongarch: Implement xvmul/xvmuh/xvmulw{ev/od} Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 27/57] target/loongarch; Implement xvdiv/xvmod Song Gao
` (30 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVMADD.{B/H/W/D};
- XVMSUB.{B/H/W/D};
- XVMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}[U];
- XVMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 34 ++++++
target/loongarch/disas.c | 34 ++++++
target/loongarch/vec_helper.c | 112 +++++++++---------
target/loongarch/insn_trans/trans_vec.c.inc | 121 ++++++++++++++------
4 files changed, 214 insertions(+), 87 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 0f9ebe641f..d6fb51ae64 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1511,6 +1511,40 @@ xvmulwod_w_hu_h 0111 01001010 00101 ..... ..... ..... @vvv
xvmulwod_d_wu_w 0111 01001010 00110 ..... ..... ..... @vvv
xvmulwod_q_du_d 0111 01001010 00111 ..... ..... ..... @vvv
+xvmadd_b 0111 01001010 10000 ..... ..... ..... @vvv
+xvmadd_h 0111 01001010 10001 ..... ..... ..... @vvv
+xvmadd_w 0111 01001010 10010 ..... ..... ..... @vvv
+xvmadd_d 0111 01001010 10011 ..... ..... ..... @vvv
+xvmsub_b 0111 01001010 10100 ..... ..... ..... @vvv
+xvmsub_h 0111 01001010 10101 ..... ..... ..... @vvv
+xvmsub_w 0111 01001010 10110 ..... ..... ..... @vvv
+xvmsub_d 0111 01001010 10111 ..... ..... ..... @vvv
+
+xvmaddwev_h_b 0111 01001010 11000 ..... ..... ..... @vvv
+xvmaddwev_w_h 0111 01001010 11001 ..... ..... ..... @vvv
+xvmaddwev_d_w 0111 01001010 11010 ..... ..... ..... @vvv
+xvmaddwev_q_d 0111 01001010 11011 ..... ..... ..... @vvv
+xvmaddwod_h_b 0111 01001010 11100 ..... ..... ..... @vvv
+xvmaddwod_w_h 0111 01001010 11101 ..... ..... ..... @vvv
+xvmaddwod_d_w 0111 01001010 11110 ..... ..... ..... @vvv
+xvmaddwod_q_d 0111 01001010 11111 ..... ..... ..... @vvv
+xvmaddwev_h_bu 0111 01001011 01000 ..... ..... ..... @vvv
+xvmaddwev_w_hu 0111 01001011 01001 ..... ..... ..... @vvv
+xvmaddwev_d_wu 0111 01001011 01010 ..... ..... ..... @vvv
+xvmaddwev_q_du 0111 01001011 01011 ..... ..... ..... @vvv
+xvmaddwod_h_bu 0111 01001011 01100 ..... ..... ..... @vvv
+xvmaddwod_w_hu 0111 01001011 01101 ..... ..... ..... @vvv
+xvmaddwod_d_wu 0111 01001011 01110 ..... ..... ..... @vvv
+xvmaddwod_q_du 0111 01001011 01111 ..... ..... ..... @vvv
+xvmaddwev_h_bu_b 0111 01001011 11000 ..... ..... ..... @vvv
+xvmaddwev_w_hu_h 0111 01001011 11001 ..... ..... ..... @vvv
+xvmaddwev_d_wu_w 0111 01001011 11010 ..... ..... ..... @vvv
+xvmaddwev_q_du_d 0111 01001011 11011 ..... ..... ..... @vvv
+xvmaddwod_h_bu_b 0111 01001011 11100 ..... ..... ..... @vvv
+xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... ..... @vvv
+xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... ..... @vvv
+xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index f839373a7a..e4369fd08b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1928,6 +1928,40 @@ INSN_LASX(xvmulwod_w_hu_h, vvv)
INSN_LASX(xvmulwod_d_wu_w, vvv)
INSN_LASX(xvmulwod_q_du_d, vvv)
+INSN_LASX(xvmadd_b, vvv)
+INSN_LASX(xvmadd_h, vvv)
+INSN_LASX(xvmadd_w, vvv)
+INSN_LASX(xvmadd_d, vvv)
+INSN_LASX(xvmsub_b, vvv)
+INSN_LASX(xvmsub_h, vvv)
+INSN_LASX(xvmsub_w, vvv)
+INSN_LASX(xvmsub_d, vvv)
+
+INSN_LASX(xvmaddwev_h_b, vvv)
+INSN_LASX(xvmaddwev_w_h, vvv)
+INSN_LASX(xvmaddwev_d_w, vvv)
+INSN_LASX(xvmaddwev_q_d, vvv)
+INSN_LASX(xvmaddwod_h_b, vvv)
+INSN_LASX(xvmaddwod_w_h, vvv)
+INSN_LASX(xvmaddwod_d_w, vvv)
+INSN_LASX(xvmaddwod_q_d, vvv)
+INSN_LASX(xvmaddwev_h_bu, vvv)
+INSN_LASX(xvmaddwev_w_hu, vvv)
+INSN_LASX(xvmaddwev_d_wu, vvv)
+INSN_LASX(xvmaddwev_q_du, vvv)
+INSN_LASX(xvmaddwod_h_bu, vvv)
+INSN_LASX(xvmaddwod_w_hu, vvv)
+INSN_LASX(xvmaddwod_d_wu, vvv)
+INSN_LASX(xvmaddwod_q_du, vvv)
+INSN_LASX(xvmaddwev_h_bu_b, vvv)
+INSN_LASX(xvmaddwev_w_hu_h, vvv)
+INSN_LASX(xvmaddwev_d_wu_w, vvv)
+INSN_LASX(xvmaddwev_q_du_d, vvv)
+INSN_LASX(xvmaddwod_h_bu_b, vvv)
+INSN_LASX(xvmaddwod_w_hu_h, vvv)
+INSN_LASX(xvmaddwod_d_wu_w, vvv)
+INSN_LASX(xvmaddwod_q_du_d, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index e152998094..a800554159 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -529,16 +529,18 @@ DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
#define DO_MADD(a, b, c) (a + b * c)
#define DO_MSUB(a, b, c) (a - b * c)
-#define VMADDSUB(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
- } \
+#define VMADDSUB(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
+ } \
}
VMADDSUB(vmadd_b, 8, B, DO_MADD)
@@ -551,15 +553,16 @@ VMADDSUB(vmsub_w, 32, W, DO_MSUB)
VMADDSUB(vmsub_d, 64, D, DO_MSUB)
#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
} \
}
@@ -571,19 +574,20 @@ VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
-#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) TD; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
- (TD)Vk->E2(2 * i + 1)); \
- } \
+#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
+ (TD)Vk->E2(2 * i + 1)); \
+ } \
}
VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
@@ -593,40 +597,42 @@ VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL)
VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL)
VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL)
-#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->ES1(0)) TS1; \
- typedef __typeof(Vd->EU1(0)) TU1; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
- (TS1)Vk->ES2(2 * i)); \
- } \
+#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->ES1(0)) TS1; \
+ typedef __typeof(Vd->EU1(0)) TU1; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
+ (TS1)Vk->ES2(2 * i)); \
+ } \
}
VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->ES1(0)) TS1; \
- typedef __typeof(Vd->EU1(0)) TU1; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
- (TS1)Vk->ES2(2 * i + 1)); \
- } \
+#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->ES1(0)) TS1; \
+ typedef __typeof(Vd->EU1(0)) TU1; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
+ (TS1)Vk->ES2(2 * i + 1)); \
+ } \
}
VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index d5b2a73ca8..4d2d3c3f82 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -2643,6 +2643,10 @@ TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
+TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
+TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
+TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
+TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2717,6 +2721,10 @@ TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
+TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
+TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
+TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
+TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2791,43 +2799,73 @@ static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
+TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
+TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
+TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
-#define VMADD_Q(NAME, FN, idx1, idx2) \
-static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
-{ \
- TCGv_i64 rh, rl, arg1, arg2, th, tl; \
- \
- if (!avail_LSX(ctx)) { \
- return false; \
- } \
- \
- rh = tcg_temp_new_i64(); \
- rl = tcg_temp_new_i64(); \
- arg1 = tcg_temp_new_i64(); \
- arg2 = tcg_temp_new_i64(); \
- th = tcg_temp_new_i64(); \
- tl = tcg_temp_new_i64(); \
- \
- get_vreg64(arg1, a->vj, idx1); \
- get_vreg64(arg2, a->vk, idx2); \
- get_vreg64(rh, a->vd, 1); \
- get_vreg64(rl, a->vd, 0); \
- \
- tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \
- tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \
- \
- set_vreg64(rh, a->vd, 1); \
- set_vreg64(rl, a->vd, 0); \
- \
- return true; \
+static bool gen_vmadd_q_vl(DisasContext * ctx,
+ arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 rh, rl, arg1, arg2, th, tl;
+ int i;
+
+ rh = tcg_temp_new_i64();
+ rl = tcg_temp_new_i64();
+ arg1 = tcg_temp_new_i64();
+ arg2 = tcg_temp_new_i64();
+ th = tcg_temp_new_i64();
+ tl = tcg_temp_new_i64();
+
+ for (i = 0; i < oprsz / 16; i++) {
+ get_vreg64(arg1, a->vj, 2 * i + idx1);
+ get_vreg64(arg2, a->vk, 2 * i + idx2);
+ get_vreg64(rh, a->vd, 2 * i + 1);
+ get_vreg64(rl, a->vd, 2 * i);
+
+ func(tl, th, arg1, arg2);
+ tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
+
+ set_vreg64(rh, a->vd, 2 * i + 1);
+ set_vreg64(rl, a->vd, 2 * i);
+ }
+
+ return true;
+}
+
+static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
+}
+
+static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
}
-VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
-VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
-VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
-VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
-VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
-VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
+TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
+TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2899,6 +2937,9 @@ static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
+TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
+TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
+TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -2969,6 +3010,9 @@ static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
+TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
+TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
+TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -3040,6 +3084,9 @@ static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
+TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
+TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
+TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -3113,6 +3160,9 @@ static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
+TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
+TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
+TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
@@ -3185,6 +3235,9 @@ static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
+TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
+TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
+TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 27/57] target/loongarch; Implement xvdiv/xvmod
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (25 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 26/57] target/loongarch: Implement xvmadd/xvmsub/xvmaddw{ev/od} Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 28/57] target/loongarch: Implement xvsat Song Gao
` (29 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVDIV.{B/H/W/D}[U];
- XVMOD.{B/H/W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 17 +++++++++++++++++
target/loongarch/disas.c | 17 +++++++++++++++++
target/loongarch/vec_helper.c | 4 +++-
target/loongarch/insn_trans/trans_vec.c.inc | 16 ++++++++++++++++
4 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index d6fb51ae64..fa25c876b4 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1545,6 +1545,23 @@ xvmaddwod_w_hu_h 0111 01001011 11101 ..... ..... ..... @vvv
xvmaddwod_d_wu_w 0111 01001011 11110 ..... ..... ..... @vvv
xvmaddwod_q_du_d 0111 01001011 11111 ..... ..... ..... @vvv
+xvdiv_b 0111 01001110 00000 ..... ..... ..... @vvv
+xvdiv_h 0111 01001110 00001 ..... ..... ..... @vvv
+xvdiv_w 0111 01001110 00010 ..... ..... ..... @vvv
+xvdiv_d 0111 01001110 00011 ..... ..... ..... @vvv
+xvmod_b 0111 01001110 00100 ..... ..... ..... @vvv
+xvmod_h 0111 01001110 00101 ..... ..... ..... @vvv
+xvmod_w 0111 01001110 00110 ..... ..... ..... @vvv
+xvmod_d 0111 01001110 00111 ..... ..... ..... @vvv
+xvdiv_bu 0111 01001110 01000 ..... ..... ..... @vvv
+xvdiv_hu 0111 01001110 01001 ..... ..... ..... @vvv
+xvdiv_wu 0111 01001110 01010 ..... ..... ..... @vvv
+xvdiv_du 0111 01001110 01011 ..... ..... ..... @vvv
+xvmod_bu 0111 01001110 01100 ..... ..... ..... @vvv
+xvmod_hu 0111 01001110 01101 ..... ..... ..... @vvv
+xvmod_wu 0111 01001110 01110 ..... ..... ..... @vvv
+xvmod_du 0111 01001110 01111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e4369fd08b..d932318b27 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1962,6 +1962,23 @@ INSN_LASX(xvmaddwod_w_hu_h, vvv)
INSN_LASX(xvmaddwod_d_wu_w, vvv)
INSN_LASX(xvmaddwod_q_du_d, vvv)
+INSN_LASX(xvdiv_b, vvv)
+INSN_LASX(xvdiv_h, vvv)
+INSN_LASX(xvdiv_w, vvv)
+INSN_LASX(xvdiv_d, vvv)
+INSN_LASX(xvdiv_bu, vvv)
+INSN_LASX(xvdiv_hu, vvv)
+INSN_LASX(xvdiv_wu, vvv)
+INSN_LASX(xvdiv_du, vvv)
+INSN_LASX(xvmod_b, vvv)
+INSN_LASX(xvmod_h, vvv)
+INSN_LASX(xvmod_w, vvv)
+INSN_LASX(xvmod_d, vvv)
+INSN_LASX(xvmod_bu, vvv)
+INSN_LASX(xvmod_hu, vvv)
+INSN_LASX(xvmod_wu, vvv)
+INSN_LASX(xvmod_du, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index a800554159..9cf979a4bb 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -653,7 +653,9 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
} \
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 4d2d3c3f82..16f3b399ea 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3255,6 +3255,22 @@ TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
+TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
+TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
+TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
+TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
+TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
+TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
+TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
+TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
+TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
+TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
+TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
+TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
+TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
+TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
+TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
+TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 28/57] target/loongarch: Implement xvsat
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (26 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 27/57] target/loongarch; Implement xvdiv/xvmod Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 29/57] target/loongarch: Implement xvexth Song Gao
` (28 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSAT.{B/H/W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 9 ++++
target/loongarch/disas.c | 9 ++++
target/loongarch/vec_helper.c | 48 +++++++++++----------
target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++
4 files changed, 51 insertions(+), 23 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index fa25c876b4..e366cf7615 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1562,6 +1562,15 @@ xvmod_hu 0111 01001110 01101 ..... ..... ..... @vvv
xvmod_wu 0111 01001110 01110 ..... ..... ..... @vvv
xvmod_du 0111 01001110 01111 ..... ..... ..... @vvv
+xvsat_b 0111 01110010 01000 01 ... ..... ..... @vv_ui3
+xvsat_h 0111 01110010 01000 1 .... ..... ..... @vv_ui4
+xvsat_w 0111 01110010 01001 ..... ..... ..... @vv_ui5
+xvsat_d 0111 01110010 0101 ...... ..... ..... @vv_ui6
+xvsat_bu 0111 01110010 10000 01 ... ..... ..... @vv_ui3
+xvsat_hu 0111 01110010 10000 1 .... ..... ..... @vv_ui4
+xvsat_wu 0111 01110010 10001 ..... ..... ..... @vv_ui5
+xvsat_du 0111 01110010 1001 ...... ..... ..... @vv_ui6
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index d932318b27..4e54dcd08a 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1979,6 +1979,15 @@ INSN_LASX(xvmod_hu, vvv)
INSN_LASX(xvmod_wu, vvv)
INSN_LASX(xvmod_du, vvv)
+INSN_LASX(xvsat_b, vv_i)
+INSN_LASX(xvsat_h, vv_i)
+INSN_LASX(xvsat_w, vv_i)
+INSN_LASX(xvsat_d, vv_i)
+INSN_LASX(xvsat_bu, vv_i)
+INSN_LASX(xvsat_hu, vv_i)
+INSN_LASX(xvsat_wu, vv_i)
+INSN_LASX(xvsat_du, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 9cf979a4bb..f2e19343bf 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -677,18 +677,19 @@ VDIV(vmod_hu, 16, UH, DO_REMU)
VDIV(vmod_wu, 32, UW, DO_REMU)
VDIV(vmod_du, 64, UD, DO_REMU)
-#define VSAT_S(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
- Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
- } \
+#define VSAT_S(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
+ Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
+ } \
}
VSAT_S(vsat_b, 8, B)
@@ -696,17 +697,18 @@ VSAT_S(vsat_h, 16, H)
VSAT_S(vsat_w, 32, W)
VSAT_S(vsat_d, 64, D)
-#define VSAT_U(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
- } \
+#define VSAT_U(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
+ } \
}
VSAT_U(vsat_bu, 8, UB)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 16f3b399ea..53fd6ed7a5 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3323,6 +3323,10 @@ TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
+TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
+TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
+TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
+TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
{
@@ -3372,6 +3376,10 @@ TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
+TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
+TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
+TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
+TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 29/57] target/loongarch: Implement xvexth
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (27 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 28/57] target/loongarch: Implement xvsat Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 30/57] target/loongarch: Implement vext2xv Song Gao
` (27 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVEXTH.{H.B/W.H/D.W/Q.D};
- XVEXTH.{HU.BU/WU.HU/DU.WU/QU.DU}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 9 ++++++
target/loongarch/disas.c | 9 ++++++
target/loongarch/vec_helper.c | 36 ++++++++++++++-------
target/loongarch/insn_trans/trans_vec.c.inc | 17 ++++++++++
4 files changed, 59 insertions(+), 12 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index e366cf7615..7491f295a5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1571,6 +1571,15 @@ xvsat_hu 0111 01110010 10000 1 .... ..... ..... @vv_ui4
xvsat_wu 0111 01110010 10001 ..... ..... ..... @vv_ui5
xvsat_du 0111 01110010 1001 ...... ..... ..... @vv_ui6
+xvexth_h_b 0111 01101001 11101 11000 ..... ..... @vv
+xvexth_w_h 0111 01101001 11101 11001 ..... ..... @vv
+xvexth_d_w 0111 01101001 11101 11010 ..... ..... @vv
+xvexth_q_d 0111 01101001 11101 11011 ..... ..... @vv
+xvexth_hu_bu 0111 01101001 11101 11100 ..... ..... @vv
+xvexth_wu_hu 0111 01101001 11101 11101 ..... ..... @vv
+xvexth_du_wu 0111 01101001 11101 11110 ..... ..... @vv
+xvexth_qu_du 0111 01101001 11101 11111 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 4e54dcd08a..d4bea69b61 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1988,6 +1988,15 @@ INSN_LASX(xvsat_hu, vv_i)
INSN_LASX(xvsat_wu, vv_i)
INSN_LASX(xvsat_du, vv_i)
+INSN_LASX(xvexth_h_b, vv)
+INSN_LASX(xvexth_w_h, vv)
+INSN_LASX(xvexth_d_w, vv)
+INSN_LASX(xvexth_q_d, vv)
+INSN_LASX(xvexth_hu_bu, vv)
+INSN_LASX(xvexth_wu_hu, vv)
+INSN_LASX(xvexth_du_wu, vv)
+INSN_LASX(xvexth_qu_du, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index f2e19343bf..2eccbc81a7 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -716,32 +716,44 @@ VSAT_U(vsat_hu, 16, UH)
VSAT_U(vsat_wu, 32, UW)
VSAT_U(vsat_du, 64, UD)
-#define VEXTH(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = Vj->E2(i + LSX_LEN/BIT); \
- } \
+#define VEXTH(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
+ } \
+ } \
}
void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_makes64(Vj->D(1));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1));
+ }
}
void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_make64((uint64_t)Vj->D(1));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1));
+ }
}
VEXTH(vexth_h_b, 16, H, B)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 53fd6ed7a5..db35745e11 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -160,6 +160,15 @@ static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
return gen_vv_vl(ctx, a, 16, fn);
}
+static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vv_vl(ctx, a, 32, fn);
+}
+
static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
gen_helper_gvec_2i *fn)
{
@@ -3389,6 +3398,14 @@ TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
+TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
+TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
+TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
+TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
+TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
+TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
+TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
+TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 30/57] target/loongarch: Implement vext2xv
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (28 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 29/57] target/loongarch: Implement xvexth Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 31/57] target/loongarch: Implement xvsigncov Song Gao
` (26 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- VEXT2XV.{H/W/D}.B, VEXT2XV.{HU/WU/DU}.BU;
- VEXT2XV.{W/D}.B, VEXT2XV.{WU/DU}.HU;
- VEXT2XV.D.W, VEXT2XV.DU.WU.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/helper.h | 13 ++++++++++
target/loongarch/insns.decode | 13 ++++++++++
target/loongarch/disas.c | 13 ++++++++++
target/loongarch/vec_helper.c | 28 +++++++++++++++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 13 ++++++++++
5 files changed, 80 insertions(+)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 1abd9e1410..e9c5412267 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -340,6 +340,19 @@ DEF_HELPER_FLAGS_3(vexth_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(vexth_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(vexth_qu_du, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_h_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_w_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_d_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_w_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_d_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_d_w, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_hu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_wu_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_du_bu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_wu_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_du_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(vext2xv_du_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(vsigncov_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsigncov_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vsigncov_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7491f295a5..db1a6689f0 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1580,6 +1580,19 @@ xvexth_wu_hu 0111 01101001 11101 11101 ..... ..... @vv
xvexth_du_wu 0111 01101001 11101 11110 ..... ..... @vv
xvexth_qu_du 0111 01101001 11101 11111 ..... ..... @vv
+vext2xv_h_b 0111 01101001 11110 00100 ..... ..... @vv
+vext2xv_w_b 0111 01101001 11110 00101 ..... ..... @vv
+vext2xv_d_b 0111 01101001 11110 00110 ..... ..... @vv
+vext2xv_w_h 0111 01101001 11110 00111 ..... ..... @vv
+vext2xv_d_h 0111 01101001 11110 01000 ..... ..... @vv
+vext2xv_d_w 0111 01101001 11110 01001 ..... ..... @vv
+vext2xv_hu_bu 0111 01101001 11110 01010 ..... ..... @vv
+vext2xv_wu_bu 0111 01101001 11110 01011 ..... ..... @vv
+vext2xv_du_bu 0111 01101001 11110 01100 ..... ..... @vv
+vext2xv_wu_hu 0111 01101001 11110 01101 ..... ..... @vv
+vext2xv_du_hu 0111 01101001 11110 01110 ..... ..... @vv
+vext2xv_du_wu 0111 01101001 11110 01111 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index d4bea69b61..714b97e238 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1997,6 +1997,19 @@ INSN_LASX(xvexth_wu_hu, vv)
INSN_LASX(xvexth_du_wu, vv)
INSN_LASX(xvexth_qu_du, vv)
+INSN_LASX(vext2xv_h_b, vv)
+INSN_LASX(vext2xv_w_b, vv)
+INSN_LASX(vext2xv_d_b, vv)
+INSN_LASX(vext2xv_w_h, vv)
+INSN_LASX(vext2xv_d_h, vv)
+INSN_LASX(vext2xv_d_w, vv)
+INSN_LASX(vext2xv_hu_bu, vv)
+INSN_LASX(vext2xv_wu_bu, vv)
+INSN_LASX(vext2xv_du_bu, vv)
+INSN_LASX(vext2xv_wu_hu, vv)
+INSN_LASX(vext2xv_du_hu, vv)
+INSN_LASX(vext2xv_du_wu, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 2eccbc81a7..3dc20243fd 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -763,6 +763,34 @@ VEXTH(vexth_hu_bu, 16, UH, UB)
VEXTH(vexth_wu_hu, 32, UW, UH)
VEXTH(vexth_du_wu, 64, UD, UW)
+#define VEXT2XV(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ temp.E1(i) = Vj->E2(i); \
+ } \
+ *Vd = temp; \
+}
+
+VEXT2XV(vext2xv_h_b, 16, H, B)
+VEXT2XV(vext2xv_w_b, 32, W, B)
+VEXT2XV(vext2xv_d_b, 64, D, B)
+VEXT2XV(vext2xv_w_h, 32, W, H)
+VEXT2XV(vext2xv_d_h, 64, D, H)
+VEXT2XV(vext2xv_d_w, 64, D, W)
+VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
+VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
+VEXT2XV(vext2xv_du_bu, 64, UD, UB)
+VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
+VEXT2XV(vext2xv_du_hu, 64, UD, UH)
+VEXT2XV(vext2xv_du_wu, 64, UD, UW)
+
#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index db35745e11..57a1c823cf 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3407,6 +3407,19 @@ TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
+TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
+TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
+TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
+TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
+TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
+TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
+TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
+TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
+TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
+TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
+TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
+TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
+
static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
TCGv_vec t1, zero;
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 31/57] target/loongarch: Implement xvsigncov
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (29 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 30/57] target/loongarch: Implement vext2xv Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 32/57] target/loongarch: Implement xvmskltz/xvmskgez/xvmsknz Song Gao
` (25 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSIGNCOV.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 5 +++++
target/loongarch/disas.c | 5 +++++
target/loongarch/insn_trans/trans_vec.c.inc | 4 ++++
3 files changed, 14 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index db1a6689f0..7bbda1a142 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1593,6 +1593,11 @@ vext2xv_wu_hu 0111 01101001 11110 01101 ..... ..... @vv
vext2xv_du_hu 0111 01101001 11110 01110 ..... ..... @vv
vext2xv_du_wu 0111 01101001 11110 01111 ..... ..... @vv
+xvsigncov_b 0111 01010010 11100 ..... ..... ..... @vvv
+xvsigncov_h 0111 01010010 11101 ..... ..... ..... @vvv
+xvsigncov_w 0111 01010010 11110 ..... ..... ..... @vvv
+xvsigncov_d 0111 01010010 11111 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 714b97e238..1f01ec99d5 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2010,6 +2010,11 @@ INSN_LASX(vext2xv_wu_hu, vv)
INSN_LASX(vext2xv_du_hu, vv)
INSN_LASX(vext2xv_du_wu, vv)
+INSN_LASX(xvsigncov_b, vvv)
+INSN_LASX(xvsigncov_h, vvv)
+INSN_LASX(xvsigncov_w, vvv)
+INSN_LASX(xvsigncov_d, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 57a1c823cf..604e85b654 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3472,6 +3472,10 @@ TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
+TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
+TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
+TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
+TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 32/57] target/loongarch: Implement xvmskltz/xvmskgez/xvmsknz
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (30 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 31/57] target/loongarch: Implement xvsigncov Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 33/57] target/loognarch: Implement xvldi Song Gao
` (24 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVMSKLTZ.{B/H/W/D};
- XVMSKGEZ.B;
- XVMSKNZ.B.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 7 ++
target/loongarch/disas.c | 7 ++
target/loongarch/vec_helper.c | 78 ++++++++++++++-------
target/loongarch/insn_trans/trans_vec.c.inc | 6 ++
4 files changed, 74 insertions(+), 24 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 7bbda1a142..6a161d6d20 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1598,6 +1598,13 @@ xvsigncov_h 0111 01010010 11101 ..... ..... ..... @vvv
xvsigncov_w 0111 01010010 11110 ..... ..... ..... @vvv
xvsigncov_d 0111 01010010 11111 ..... ..... ..... @vvv
+xvmskltz_b 0111 01101001 11000 10000 ..... ..... @vv
+xvmskltz_h 0111 01101001 11000 10001 ..... ..... @vv
+xvmskltz_w 0111 01101001 11000 10010 ..... ..... @vv
+xvmskltz_d 0111 01101001 11000 10011 ..... ..... @vv
+xvmskgez_b 0111 01101001 11000 10100 ..... ..... @vv
+xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1f01ec99d5..05710098ad 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2015,6 +2015,13 @@ INSN_LASX(xvsigncov_h, vvv)
INSN_LASX(xvsigncov_w, vvv)
INSN_LASX(xvsigncov_d, vvv)
+INSN_LASX(xvmskltz_b, vv)
+INSN_LASX(xvmskltz_h, vv)
+INSN_LASX(xvmskltz_w, vv)
+INSN_LASX(xvmskltz_d, vv)
+INSN_LASX(xvmskgez_b, vv)
+INSN_LASX(xvmsknz_b, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 3dc20243fd..f749800880 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -810,14 +810,19 @@ static uint64_t do_vmskltz_b(int64_t val)
void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
{
+ int i;
uint16_t temp = 0;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- temp = do_vmskltz_b(Vj->D(0));
- temp |= (do_vmskltz_b(Vj->D(1)) << 8);
- Vd->D(0) = temp;
- Vd->D(1) = 0;
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_b(Vj->D(2 * i));
+ temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
}
static uint64_t do_vmskltz_h(int64_t val)
@@ -831,14 +836,19 @@ static uint64_t do_vmskltz_h(int64_t val)
void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
{
+ int i;
uint16_t temp = 0;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- temp = do_vmskltz_h(Vj->D(0));
- temp |= (do_vmskltz_h(Vj->D(1)) << 4);
- Vd->D(0) = temp;
- Vd->D(1) = 0;
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_h(Vj->D(2 * i));
+ temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
}
static uint64_t do_vmskltz_w(int64_t val)
@@ -851,14 +861,19 @@ static uint64_t do_vmskltz_w(int64_t val)
void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
{
+ int i;
uint16_t temp = 0;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- temp = do_vmskltz_w(Vj->D(0));
- temp |= (do_vmskltz_w(Vj->D(1)) << 2);
- Vd->D(0) = temp;
- Vd->D(1) = 0;
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_w(Vj->D(2 * i));
+ temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
}
static uint64_t do_vmskltz_d(int64_t val)
@@ -867,26 +882,36 @@ static uint64_t do_vmskltz_d(int64_t val)
}
void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
{
+ int i;
uint16_t temp = 0;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- temp = do_vmskltz_d(Vj->D(0));
- temp |= (do_vmskltz_d(Vj->D(1)) << 1);
- Vd->D(0) = temp;
- Vd->D(1) = 0;
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_d(Vj->D(2 * i));
+ temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
}
void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
{
+ int i;
uint16_t temp = 0;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- temp = do_vmskltz_b(Vj->D(0));
- temp |= (do_vmskltz_b(Vj->D(1)) << 8);
- Vd->D(0) = (uint16_t)(~temp);
- Vd->D(1) = 0;
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_b(Vj->D(2 * i));
+ temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
+ Vd->D(2 * i) = (uint16_t)(~temp);
+ Vd->D(2 * i + 1) = 0;
+ }
}
static uint64_t do_vmskez_b(uint64_t a)
@@ -901,14 +926,19 @@ static uint64_t do_vmskez_b(uint64_t a)
void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
{
+ int i;
uint16_t temp = 0;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- temp = do_vmskez_b(Vj->D(0));
- temp |= (do_vmskez_b(Vj->D(1)) << 8);
- Vd->D(0) = (uint16_t)(~temp);
- Vd->D(1) = 0;
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskez_b(Vj->D(2 * i));
+ temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8);
+ Vd->D(2 * i) = (uint16_t)(~temp);
+ Vd->D(2 * i + 1) = 0;
+ }
}
void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 604e85b654..b889b6c966 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3483,6 +3483,12 @@ TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
+TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
+TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
+TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
+TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
+TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
+TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 33/57] target/loognarch: Implement xvldi
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (31 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 32/57] target/loongarch: Implement xvmskltz/xvmskgez/xvmsknz Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 34/57] target/loongarch: Implement LASX logic instructions Song Gao
` (23 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVLDI.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 2 ++
target/loongarch/disas.c | 7 +++++++
target/loongarch/insn_trans/trans_vec.c.inc | 13 ++++++-------
3 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 6a161d6d20..edaa756395 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1605,6 +1605,8 @@ xvmskltz_d 0111 01101001 11000 10011 ..... ..... @vv
xvmskgez_b 0111 01101001 11000 10100 ..... ..... @vv
xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv
+xvldi 0111 01111110 00 ............. ..... @v_i13
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 05710098ad..3f6fbeddd7 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1703,6 +1703,11 @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
return true; \
}
+static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
+}
+
static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
{
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
@@ -2022,6 +2027,8 @@ INSN_LASX(xvmskltz_d, vv)
INSN_LASX(xvmskgez_b, vv)
INSN_LASX(xvmsknz_b, vv)
+INSN_LASX(xvldi, v_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index b889b6c966..5dc7fdb47e 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3606,16 +3606,12 @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
return data;
}
-static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
+static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
{
int sel, vece;
uint64_t value;
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, oprsz)) {
return true;
}
@@ -3629,11 +3625,14 @@ static bool trans_vldi(DisasContext *ctx, arg_vldi *a)
vece = (a->imm >> 10) & 0x3;
}
- tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8,
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
tcg_constant_i64(value));
return true;
}
+TRANS(vldi, LSX, gen_vldi, 16)
+TRANS(xvldi, LASX, gen_vldi, 32)
+
TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 34/57] target/loongarch: Implement LASX logic instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (32 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 33/57] target/loognarch: Implement xvldi Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 35/57] target/loongarch: Implement xvsll xvsrl xvsra xvrotr Song Gao
` (22 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XV{AND/OR/XOR/NOR/ANDN/ORN}.V;
- XV{AND/OR/XOR/NOR}I.B.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 12 +++++++
target/loongarch/disas.c | 12 +++++++
target/loongarch/vec_helper.c | 4 +--
target/loongarch/insn_trans/trans_vec.c.inc | 38 ++++++++++++---------
4 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index edaa756395..fb28666577 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1607,6 +1607,18 @@ xvmsknz_b 0111 01101001 11000 11000 ..... ..... @vv
xvldi 0111 01111110 00 ............. ..... @v_i13
+xvand_v 0111 01010010 01100 ..... ..... ..... @vvv
+xvor_v 0111 01010010 01101 ..... ..... ..... @vvv
+xvxor_v 0111 01010010 01110 ..... ..... ..... @vvv
+xvnor_v 0111 01010010 01111 ..... ..... ..... @vvv
+xvandn_v 0111 01010010 10000 ..... ..... ..... @vvv
+xvorn_v 0111 01010010 10001 ..... ..... ..... @vvv
+
+xvandi_b 0111 01111101 00 ........ ..... ..... @vv_ui8
+xvori_b 0111 01111101 01 ........ ..... ..... @vv_ui8
+xvxori_b 0111 01111101 10 ........ ..... ..... @vv_ui8
+xvnori_b 0111 01111101 11 ........ ..... ..... @vv_ui8
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 3f6fbeddd7..e9adc017db 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2029,6 +2029,18 @@ INSN_LASX(xvmsknz_b, vv)
INSN_LASX(xvldi, v_i)
+INSN_LASX(xvand_v, vvv)
+INSN_LASX(xvor_v, vvv)
+INSN_LASX(xvxor_v, vvv)
+INSN_LASX(xvnor_v, vvv)
+INSN_LASX(xvandn_v, vvv)
+INSN_LASX(xvorn_v, vvv)
+
+INSN_LASX(xvandi_b, vv_i)
+INSN_LASX(xvori_b, vv_i)
+INSN_LASX(xvxori_b, vv_i)
+INSN_LASX(xvnori_b, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index f749800880..1a602ee548 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -941,13 +941,13 @@ void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
}
}
-void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
+void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- for (i = 0; i < LSX_LEN/8; i++) {
+ for (i = 0; i < simd_oprsz(desc); i++) {
Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
}
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 5dc7fdb47e..331cf1ad08 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3633,20 +3633,11 @@ static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
TRANS(vldi, LSX, gen_vldi, 16)
TRANS(xvldi, LASX, gen_vldi, 32)
-TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
-TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
-TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
-TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
-
-static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
+static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
{
uint32_t vd_ofs, vj_ofs, vk_ofs;
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, oprsz)) {
return true;
}
@@ -3654,13 +3645,9 @@ static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
vj_ofs = vec_full_offset(a->vj);
vk_ofs = vec_full_offset(a->vk);
- tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
+ tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
return true;
}
-TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
-TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
-TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
-TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
{
@@ -3693,7 +3680,26 @@ static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
}
+TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
+TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
+TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
+TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
+TRANS(vandn_v, LSX, gen_vandn_v, 16)
+TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
+TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
+TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
+TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
+TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
+TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
+TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
+TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
+TRANS(xvandn_v, LASX, gen_vandn_v, 32)
+TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
+TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
+TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
+TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
+TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 35/57] target/loongarch: Implement xvsll xvsrl xvsra xvrotr
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (33 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 34/57] target/loongarch: Implement LASX logic instructions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 36/57] target/loongarch: Implement xvsllwil xvextl Song Gao
` (21 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSLL[I].{B/H/W/D};
- XVSRL[I].{B/H/W/D};
- XVSRA[I].{B/H/W/D};
- XVROTR[I].{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 33 +++++++++++++++++++
target/loongarch/disas.c | 36 +++++++++++++++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 32 ++++++++++++++++++
3 files changed, 101 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index fb28666577..fb7bd9fb34 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1619,6 +1619,39 @@ xvori_b 0111 01111101 01 ........ ..... ..... @vv_ui8
xvxori_b 0111 01111101 10 ........ ..... ..... @vv_ui8
xvnori_b 0111 01111101 11 ........ ..... ..... @vv_ui8
+xvsll_b 0111 01001110 10000 ..... ..... ..... @vvv
+xvsll_h 0111 01001110 10001 ..... ..... ..... @vvv
+xvsll_w 0111 01001110 10010 ..... ..... ..... @vvv
+xvsll_d 0111 01001110 10011 ..... ..... ..... @vvv
+xvslli_b 0111 01110010 11000 01 ... ..... ..... @vv_ui3
+xvslli_h 0111 01110010 11000 1 .... ..... ..... @vv_ui4
+xvslli_w 0111 01110010 11001 ..... ..... ..... @vv_ui5
+xvslli_d 0111 01110010 1101 ...... ..... ..... @vv_ui6
+xvsrl_b 0111 01001110 10100 ..... ..... ..... @vvv
+xvsrl_h 0111 01001110 10101 ..... ..... ..... @vvv
+xvsrl_w 0111 01001110 10110 ..... ..... ..... @vvv
+xvsrl_d 0111 01001110 10111 ..... ..... ..... @vvv
+xvsrli_b 0111 01110011 00000 01 ... ..... ..... @vv_ui3
+xvsrli_h 0111 01110011 00000 1 .... ..... ..... @vv_ui4
+xvsrli_w 0111 01110011 00001 ..... ..... ..... @vv_ui5
+xvsrli_d 0111 01110011 0001 ...... ..... ..... @vv_ui6
+xvsra_b 0111 01001110 11000 ..... ..... ..... @vvv
+xvsra_h 0111 01001110 11001 ..... ..... ..... @vvv
+xvsra_w 0111 01001110 11010 ..... ..... ..... @vvv
+xvsra_d 0111 01001110 11011 ..... ..... ..... @vvv
+xvsrai_b 0111 01110011 01000 01 ... ..... ..... @vv_ui3
+xvsrai_h 0111 01110011 01000 1 .... ..... ..... @vv_ui4
+xvsrai_w 0111 01110011 01001 ..... ..... ..... @vv_ui5
+xvsrai_d 0111 01110011 0101 ...... ..... ..... @vv_ui6
+xvrotr_b 0111 01001110 11100 ..... ..... ..... @vvv
+xvrotr_h 0111 01001110 11101 ..... ..... ..... @vvv
+xvrotr_w 0111 01001110 11110 ..... ..... ..... @vvv
+xvrotr_d 0111 01001110 11111 ..... ..... ..... @vvv
+xvrotri_b 0111 01101010 00000 01 ... ..... ..... @vv_ui3
+xvrotri_h 0111 01101010 00000 1 .... ..... ..... @vv_ui4
+xvrotri_w 0111 01101010 00001 ..... ..... ..... @vv_ui5
+xvrotri_d 0111 01101010 0001 ...... ..... ..... @vv_ui6
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e9adc017db..209ae230f4 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2041,6 +2041,42 @@ INSN_LASX(xvori_b, vv_i)
INSN_LASX(xvxori_b, vv_i)
INSN_LASX(xvnori_b, vv_i)
+INSN_LASX(xvsll_b, vvv)
+INSN_LASX(xvsll_h, vvv)
+INSN_LASX(xvsll_w, vvv)
+INSN_LASX(xvsll_d, vvv)
+INSN_LASX(xvslli_b, vv_i)
+INSN_LASX(xvslli_h, vv_i)
+INSN_LASX(xvslli_w, vv_i)
+INSN_LASX(xvslli_d, vv_i)
+
+INSN_LASX(xvsrl_b, vvv)
+INSN_LASX(xvsrl_h, vvv)
+INSN_LASX(xvsrl_w, vvv)
+INSN_LASX(xvsrl_d, vvv)
+INSN_LASX(xvsrli_b, vv_i)
+INSN_LASX(xvsrli_h, vv_i)
+INSN_LASX(xvsrli_w, vv_i)
+INSN_LASX(xvsrli_d, vv_i)
+
+INSN_LASX(xvsra_b, vvv)
+INSN_LASX(xvsra_h, vvv)
+INSN_LASX(xvsra_w, vvv)
+INSN_LASX(xvsra_d, vvv)
+INSN_LASX(xvsrai_b, vv_i)
+INSN_LASX(xvsrai_h, vv_i)
+INSN_LASX(xvsrai_w, vv_i)
+INSN_LASX(xvsrai_d, vv_i)
+
+INSN_LASX(xvrotr_b, vvv)
+INSN_LASX(xvrotr_h, vvv)
+INSN_LASX(xvrotr_w, vvv)
+INSN_LASX(xvrotr_d, vvv)
+INSN_LASX(xvrotri_b, vv_i)
+INSN_LASX(xvrotri_h, vv_i)
+INSN_LASX(xvrotri_w, vv_i)
+INSN_LASX(xvrotri_d, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 331cf1ad08..74cf6e0472 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3709,6 +3709,14 @@ TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
+TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
+TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
+TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
+TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
+TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
+TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
+TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
+TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
@@ -3718,6 +3726,14 @@ TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
+TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
+TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
+TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
+TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
+TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
+TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
+TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
+TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
@@ -3727,6 +3743,14 @@ TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
+TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
+TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
+TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
+TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
+TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
+TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
+TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
+TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
@@ -3736,6 +3760,14 @@ TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
+TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
+TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
+TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
+TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
+TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 36/57] target/loongarch: Implement xvsllwil xvextl
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (34 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 35/57] target/loongarch: Implement xvsll xvsrl xvsra xvrotr Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 37/57] target/loongarch: Implement xvsrlr xvsrar Song Gao
` (20 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSLLWIL.{H.B/W.H/D.W};
- XVSLLWIL.{HU.BU/WU.HU/DU.WU};
- XVEXTL.Q.D, VEXTL.QU.DU.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 9 +++++
target/loongarch/disas.c | 9 +++++
target/loongarch/vec_helper.c | 45 +++++++++++++--------
target/loongarch/insn_trans/trans_vec.c.inc | 17 ++++++++
4 files changed, 63 insertions(+), 17 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index fb7bd9fb34..8a7933eccc 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1652,6 +1652,15 @@ xvrotri_h 0111 01101010 00000 1 .... ..... ..... @vv_ui4
xvrotri_w 0111 01101010 00001 ..... ..... ..... @vv_ui5
xvrotri_d 0111 01101010 0001 ...... ..... ..... @vv_ui6
+xvsllwil_h_b 0111 01110000 10000 01 ... ..... ..... @vv_ui3
+xvsllwil_w_h 0111 01110000 10000 1 .... ..... ..... @vv_ui4
+xvsllwil_d_w 0111 01110000 10001 ..... ..... ..... @vv_ui5
+xvextl_q_d 0111 01110000 10010 00000 ..... ..... @vv
+xvsllwil_hu_bu 0111 01110000 11000 01 ... ..... ..... @vv_ui3
+xvsllwil_wu_hu 0111 01110000 11000 1 .... ..... ..... @vv_ui4
+xvsllwil_du_wu 0111 01110000 11001 ..... ..... ..... @vv_ui5
+xvextl_qu_du 0111 01110000 11010 00000 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 209ae230f4..d93ecdb60d 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2077,6 +2077,15 @@ INSN_LASX(xvrotri_h, vv_i)
INSN_LASX(xvrotri_w, vv_i)
INSN_LASX(xvrotri_d, vv_i)
+INSN_LASX(xvsllwil_h_b, vv_i)
+INSN_LASX(xvsllwil_w_h, vv_i)
+INSN_LASX(xvsllwil_d_w, vv_i)
+INSN_LASX(xvextl_q_d, vv)
+INSN_LASX(xvsllwil_hu_bu, vv_i)
+INSN_LASX(xvsllwil_wu_hu, vv_i)
+INSN_LASX(xvsllwil_du_wu, vv_i)
+INSN_LASX(xvextl_qu_du, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 1a602ee548..a3376439e3 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -952,37 +952,48 @@ void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
}
}
-#define VSLLWIL(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(temp.E1(0)) TD; \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = (TD)Vj->E2(i) << (imm % BIT); \
- } \
- *Vd = temp; \
+#define VSLLWIL(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ typedef __typeof(temp.E1(0)) TD; \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \
+ } \
+ } \
+ *Vd = temp; \
}
+
void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_makes64(Vj->D(0));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_makes64(Vj->D(2 * i));
+ }
}
void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
{
+ int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- Vd->Q(0) = int128_make64(Vj->D(0));
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_make64(Vj->UD(2 * i));
+ }
}
VSLLWIL(vsllwil_h_b, 16, H, B)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 74cf6e0472..e6abb2bd16 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -188,6 +188,15 @@ static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
return gen_vv_i_vl(ctx, a, 16, fn);
}
+static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vv_i_vl(ctx, a, 32, fn);
+}
+
static bool gen_cv(DisasContext *ctx, arg_cv *a,
void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
{
@@ -3777,6 +3786,14 @@ TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
+TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
+TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
+TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
+TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
+TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
+TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
+TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
+TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 37/57] target/loongarch: Implement xvsrlr xvsrar
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (35 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 36/57] target/loongarch: Implement xvsllwil xvextl Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 38/57] target/loongarch: Implement xvsrln xvsran Song Gao
` (19 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSRLR[I].{B/H/W/D};
- XVSRAR[I].{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 17 +++++++++++++++++
target/loongarch/disas.c | 18 ++++++++++++++++++
target/loongarch/vec_helper.c | 12 ++++++++----
target/loongarch/insn_trans/trans_vec.c.inc | 16 ++++++++++++++++
4 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 8a7933eccc..ca0951e1cc 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1661,6 +1661,23 @@ xvsllwil_wu_hu 0111 01110000 11000 1 .... ..... ..... @vv_ui4
xvsllwil_du_wu 0111 01110000 11001 ..... ..... ..... @vv_ui5
xvextl_qu_du 0111 01110000 11010 00000 ..... ..... @vv
+xvsrlr_b 0111 01001111 00000 ..... ..... ..... @vvv
+xvsrlr_h 0111 01001111 00001 ..... ..... ..... @vvv
+xvsrlr_w 0111 01001111 00010 ..... ..... ..... @vvv
+xvsrlr_d 0111 01001111 00011 ..... ..... ..... @vvv
+xvsrlri_b 0111 01101010 01000 01 ... ..... ..... @vv_ui3
+xvsrlri_h 0111 01101010 01000 1 .... ..... ..... @vv_ui4
+xvsrlri_w 0111 01101010 01001 ..... ..... ..... @vv_ui5
+xvsrlri_d 0111 01101010 0101 ...... ..... ..... @vv_ui6
+xvsrar_b 0111 01001111 00100 ..... ..... ..... @vvv
+xvsrar_h 0111 01001111 00101 ..... ..... ..... @vvv
+xvsrar_w 0111 01001111 00110 ..... ..... ..... @vvv
+xvsrar_d 0111 01001111 00111 ..... ..... ..... @vvv
+xvsrari_b 0111 01101010 10000 01 ... ..... ..... @vv_ui3
+xvsrari_h 0111 01101010 10000 1 .... ..... ..... @vv_ui4
+xvsrari_w 0111 01101010 10001 ..... ..... ..... @vv_ui5
+xvsrari_d 0111 01101010 1001 ...... ..... ..... @vv_ui6
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index d93ecdb60d..bc5eb82b49 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2086,6 +2086,24 @@ INSN_LASX(xvsllwil_wu_hu, vv_i)
INSN_LASX(xvsllwil_du_wu, vv_i)
INSN_LASX(xvextl_qu_du, vv)
+INSN_LASX(xvsrlr_b, vvv)
+INSN_LASX(xvsrlr_h, vvv)
+INSN_LASX(xvsrlr_w, vvv)
+INSN_LASX(xvsrlr_d, vvv)
+INSN_LASX(xvsrlri_b, vv_i)
+INSN_LASX(xvsrlri_h, vv_i)
+INSN_LASX(xvsrlri_w, vv_i)
+INSN_LASX(xvsrlri_d, vv_i)
+
+INSN_LASX(xvsrar_b, vvv)
+INSN_LASX(xvsrar_h, vvv)
+INSN_LASX(xvsrar_w, vvv)
+INSN_LASX(xvsrar_d, vvv)
+INSN_LASX(xvsrari_b, vv_i)
+INSN_LASX(xvsrari_h, vv_i)
+INSN_LASX(xvsrari_w, vv_i)
+INSN_LASX(xvsrari_d, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index a3376439e3..bb30d24b89 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -1025,8 +1025,9 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
} \
}
@@ -1042,8 +1043,9 @@ void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
} \
}
@@ -1075,8 +1077,9 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
} \
}
@@ -1092,8 +1095,9 @@ void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
} \
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index e6abb2bd16..9d95c42708 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3803,6 +3803,14 @@ TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
+TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
+TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
+TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
+TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
+TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
+TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
+TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
+TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
@@ -3812,6 +3820,14 @@ TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
+TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
+TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
+TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
+TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
+TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
+TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
+TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
+TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 38/57] target/loongarch: Implement xvsrln xvsran
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (36 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 37/57] target/loongarch: Implement xvsrlr xvsrar Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 39/57] target/loongarch: Implement xvsrlrn xvsrarn Song Gao
` (18 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSRLN.{B.H/H.W/W.D};
- XVSRAN.{B.H/H.W/W.D};
- XVSRLNI.{B.H/H.W/W.D/D.Q};
- XVSRANI.{B.H/H.W/W.D/D.Q}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 16 ++
target/loongarch/disas.c | 16 ++
target/loongarch/vec_helper.c | 166 +++++++++++---------
target/loongarch/insn_trans/trans_vec.c.inc | 14 ++
4 files changed, 137 insertions(+), 75 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index ca0951e1cc..204dcfa075 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1678,6 +1678,22 @@ xvsrari_h 0111 01101010 10000 1 .... ..... ..... @vv_ui4
xvsrari_w 0111 01101010 10001 ..... ..... ..... @vv_ui5
xvsrari_d 0111 01101010 1001 ...... ..... ..... @vv_ui6
+xvsrln_b_h 0111 01001111 01001 ..... ..... ..... @vvv
+xvsrln_h_w 0111 01001111 01010 ..... ..... ..... @vvv
+xvsrln_w_d 0111 01001111 01011 ..... ..... ..... @vvv
+xvsran_b_h 0111 01001111 01101 ..... ..... ..... @vvv
+xvsran_h_w 0111 01001111 01110 ..... ..... ..... @vvv
+xvsran_w_d 0111 01001111 01111 ..... ..... ..... @vvv
+
+xvsrlni_b_h 0111 01110100 00000 1 .... ..... ..... @vv_ui4
+xvsrlni_h_w 0111 01110100 00001 ..... ..... ..... @vv_ui5
+xvsrlni_w_d 0111 01110100 0001 ...... ..... ..... @vv_ui6
+xvsrlni_d_q 0111 01110100 001 ....... ..... ..... @vv_ui7
+xvsrani_b_h 0111 01110101 10000 1 .... ..... ..... @vv_ui4
+xvsrani_h_w 0111 01110101 10001 ..... ..... ..... @vv_ui5
+xvsrani_w_d 0111 01110101 1001 ...... ..... ..... @vv_ui6
+xvsrani_d_q 0111 01110101 101 ....... ..... ..... @vv_ui7
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index bc5eb82b49..28e5e16eb2 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2104,6 +2104,22 @@ INSN_LASX(xvsrari_h, vv_i)
INSN_LASX(xvsrari_w, vv_i)
INSN_LASX(xvsrari_d, vv_i)
+INSN_LASX(xvsrln_b_h, vvv)
+INSN_LASX(xvsrln_h_w, vvv)
+INSN_LASX(xvsrln_w_d, vvv)
+INSN_LASX(xvsran_b_h, vvv)
+INSN_LASX(xvsran_h_w, vvv)
+INSN_LASX(xvsran_w_d, vvv)
+
+INSN_LASX(xvsrlni_b_h, vv_i)
+INSN_LASX(xvsrlni_h_w, vv_i)
+INSN_LASX(xvsrlni_w_d, vv_i)
+INSN_LASX(xvsrlni_d_q, vv_i)
+INSN_LASX(xvsrani_b_h, vv_i)
+INSN_LASX(xvsrani_h_w, vv_i)
+INSN_LASX(xvsrani_w_d, vv_i)
+INSN_LASX(xvsrani_d_q, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index bb30d24b89..8c405ce32b 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -1109,105 +1109,121 @@ VSRARI(vsrari_d, 64, D)
#define R_SHIFT(a, b) (a >> b)
-#define VSRLN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = R_SHIFT((T)Vj->E2(i),((T)Vk->E2(i)) % BIT); \
- } \
- Vd->D(1) = 0; \
+#define VSRLN(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
+ Vk->E2(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSRLN(vsrln_b_h, 16, uint16_t, B, H)
-VSRLN(vsrln_h_w, 32, uint32_t, H, W)
-VSRLN(vsrln_w_d, 64, uint64_t, W, D)
+VSRLN(vsrln_b_h, 16, B, UH)
+VSRLN(vsrln_h_w, 32, H, UW)
+VSRLN(vsrln_w_d, 64, W, UD)
-#define VSRAN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = R_SHIFT(Vj->E2(i), ((T)Vk->E2(i)) % BIT); \
- } \
- Vd->D(1) = 0; \
+#define VSRAN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSRAN(vsran_b_h, 16, uint16_t, B, H)
-VSRAN(vsran_h_w, 32, uint32_t, H, W)
-VSRAN(vsran_w_d, 64, uint64_t, W, D)
+VSRAN(vsran_b_h, 16, B, H, UH)
+VSRAN(vsran_h_w, 32, H, W, UW)
+VSRAN(vsran_w_d, 64, W, D, UD)
-#define VSRLNI(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = R_SHIFT((T)Vj->E2(i), imm); \
- temp.E1(i + max) = R_SHIFT((T)Vd->E2(i), imm); \
- } \
- *Vd = temp; \
+#define VSRLNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
}
void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- VReg temp;
+ int i;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- temp.D(0) = 0;
- temp.D(1) = 0;
- temp.D(0) = int128_getlo(int128_urshift(Vj->Q(0), imm % 128));
- temp.D(1) = int128_getlo(int128_urshift(Vd->Q(0), imm % 128));
+ for (i = 0; i < 2; i++) {
+ temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128));
+ temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128));
+ }
*Vd = temp;
}
-VSRLNI(vsrlni_b_h, 16, uint16_t, B, H)
-VSRLNI(vsrlni_h_w, 32, uint32_t, H, W)
-VSRLNI(vsrlni_w_d, 64, uint64_t, W, D)
+VSRLNI(vsrlni_b_h, 16, B, UH)
+VSRLNI(vsrlni_h_w, 32, H, UW)
+VSRLNI(vsrlni_w_d, 64, W, UD)
-#define VSRANI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = R_SHIFT(Vj->E2(i), imm); \
- temp.E1(i + max) = R_SHIFT(Vd->E2(i), imm); \
- } \
- *Vd = temp; \
+#define VSRANI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
}
void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- VReg temp;
+ int i;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- temp.D(0) = 0;
- temp.D(1) = 0;
- temp.D(0) = int128_getlo(int128_rshift(Vj->Q(0), imm % 128));
- temp.D(1) = int128_getlo(int128_rshift(Vd->Q(0), imm % 128));
+ for (i = 0; i < 2; i++) {
+ temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128));
+ temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128));
+ }
*Vd = temp;
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 9d95c42708..01934e5f97 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3835,6 +3835,12 @@ TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
+TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
+TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
+TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
+TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
+TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
+TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
@@ -3844,6 +3850,14 @@ TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
+TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
+TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
+TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
+TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
+TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
+TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
+TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
+TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 39/57] target/loongarch: Implement xvsrlrn xvsrarn
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (37 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 38/57] target/loongarch: Implement xvsrln xvsran Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 40/57] target/loongarch: Implement xvssrln xvssran Song Gao
` (17 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSRLRN.{B.H/H.W/W.D};
- XVSRARN.{B.H/H.W/W.D};
- XVSRLRNI.{B.H/H.W/W.D/D.Q};
- XVSRARNI.{B.H/H.W/W.D/D.Q}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 16 ++
target/loongarch/disas.c | 16 ++
target/loongarch/vec_helper.c | 198 +++++++++++---------
target/loongarch/insn_trans/trans_vec.c.inc | 14 ++
4 files changed, 159 insertions(+), 85 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 204dcfa075..d7c50b14ca 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1694,6 +1694,22 @@ xvsrani_h_w 0111 01110101 10001 ..... ..... ..... @vv_ui5
xvsrani_w_d 0111 01110101 1001 ...... ..... ..... @vv_ui6
xvsrani_d_q 0111 01110101 101 ....... ..... ..... @vv_ui7
+xvsrlrn_b_h 0111 01001111 10001 ..... ..... ..... @vvv
+xvsrlrn_h_w 0111 01001111 10010 ..... ..... ..... @vvv
+xvsrlrn_w_d 0111 01001111 10011 ..... ..... ..... @vvv
+xvsrarn_b_h 0111 01001111 10101 ..... ..... ..... @vvv
+xvsrarn_h_w 0111 01001111 10110 ..... ..... ..... @vvv
+xvsrarn_w_d 0111 01001111 10111 ..... ..... ..... @vvv
+
+xvsrlrni_b_h 0111 01110100 01000 1 .... ..... ..... @vv_ui4
+xvsrlrni_h_w 0111 01110100 01001 ..... ..... ..... @vv_ui5
+xvsrlrni_w_d 0111 01110100 0101 ...... ..... ..... @vv_ui6
+xvsrlrni_d_q 0111 01110100 011 ....... ..... ..... @vv_ui7
+xvsrarni_b_h 0111 01110101 11000 1 .... ..... ..... @vv_ui4
+xvsrarni_h_w 0111 01110101 11001 ..... ..... ..... @vv_ui5
+xvsrarni_w_d 0111 01110101 1101 ...... ..... ..... @vv_ui6
+xvsrarni_d_q 0111 01110101 111 ....... ..... ..... @vv_ui7
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 28e5e16eb2..e7b5974eaa 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2120,6 +2120,22 @@ INSN_LASX(xvsrani_h_w, vv_i)
INSN_LASX(xvsrani_w_d, vv_i)
INSN_LASX(xvsrani_d_q, vv_i)
+INSN_LASX(xvsrlrn_b_h, vvv)
+INSN_LASX(xvsrlrn_h_w, vvv)
+INSN_LASX(xvsrlrn_w_d, vvv)
+INSN_LASX(xvsrarn_b_h, vvv)
+INSN_LASX(xvsrarn_h_w, vvv)
+INSN_LASX(xvsrarn_w_d, vvv)
+
+INSN_LASX(xvsrlrni_b_h, vv_i)
+INSN_LASX(xvsrlrni_h_w, vv_i)
+INSN_LASX(xvsrlrni_w_d, vv_i)
+INSN_LASX(xvsrlrni_d_q, vv_i)
+INSN_LASX(xvsrarni_b_h, vv_i)
+INSN_LASX(xvsrarni_h_w, vv_i)
+INSN_LASX(xvsrarni_w_d, vv_i)
+INSN_LASX(xvsrarni_d_q, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 8c405ce32b..a3f9b396fa 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -1231,76 +1231,95 @@ VSRANI(vsrani_b_h, 16, B, H)
VSRANI(vsrani_h_w, 32, H, W)
VSRANI(vsrani_w_d, 64, W, D)
-#define VSRLRN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_vsrlr_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
- } \
- Vd->D(1) = 0; \
+#define VSRLRN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSRLRN(vsrlrn_b_h, 16, uint16_t, B, H)
-VSRLRN(vsrlrn_h_w, 32, uint32_t, H, W)
-VSRLRN(vsrlrn_w_d, 64, uint64_t, W, D)
+VSRLRN(vsrlrn_b_h, 16, B, H, UH)
+VSRLRN(vsrlrn_h_w, 32, H, W, UW)
+VSRLRN(vsrlrn_w_d, 64, W, D, UD)
-#define VSRARN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_vsrar_ ## E2(Vj->E2(i), ((T)Vk->E2(i))%BIT); \
- } \
- Vd->D(1) = 0; \
+#define VSRARN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSRARN(vsrarn_b_h, 16, uint8_t, B, H)
-VSRARN(vsrarn_h_w, 32, uint16_t, H, W)
-VSRARN(vsrarn_w_d, 64, uint32_t, W, D)
-
-#define VSRLRNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = do_vsrlr_ ## E2(Vj->E2(i), imm); \
- temp.E1(i + max) = do_vsrlr_ ## E2(Vd->E2(i), imm); \
- } \
- *Vd = temp; \
+VSRARN(vsrarn_b_h, 16, B, H, UH)
+VSRARN(vsrarn_h_w, 32, H, W, UW)
+VSRARN(vsrarn_w_d, 64, W, D, UD)
+
+#define VSRLRNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
}
void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- VReg temp;
+ int i;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- Int128 r1, r2;
-
- if (imm == 0) {
- temp.D(0) = int128_getlo(Vj->Q(0));
- temp.D(1) = int128_getlo(Vd->Q(0));
- } else {
- r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one());
- r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one());
+ Int128 r[4];
+ int oprsz = simd_oprsz(desc);
- temp.D(0) = int128_getlo(int128_add(int128_urshift(Vj->Q(0), imm), r1));
- temp.D(1) = int128_getlo(int128_add(int128_urshift(Vd->Q(0), imm), r2));
+ for (i = 0; i < oprsz / 16; i++) {
+ if (imm == 0) {
+ temp.D(2 * i) = int128_getlo(Vj->Q(i));
+ temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
+ } else {
+ r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)),
+ int128_one());
+ r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)),
+ int128_one());
+ temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i),
+ imm), r[2 * i]));
+ temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i),
+ imm), r[ 2 * i + 1]));
+ }
}
*Vd = temp;
}
@@ -1309,40 +1328,49 @@ VSRLRNI(vsrlrni_b_h, 16, B, H)
VSRLRNI(vsrlrni_h_w, 32, H, W)
VSRLRNI(vsrlrni_w_d, 64, W, D)
-#define VSRARNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, max; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- temp.D(0) = 0; \
- temp.D(1) = 0; \
- max = LSX_LEN/BIT; \
- for (i = 0; i < max; i++) { \
- temp.E1(i) = do_vsrar_ ## E2(Vj->E2(i), imm); \
- temp.E1(i + max) = do_vsrar_ ## E2(Vd->E2(i), imm); \
- } \
- *Vd = temp; \
+#define VSRARNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
}
void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- VReg temp;
+ int i;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- Int128 r1, r2;
-
- if (imm == 0) {
- temp.D(0) = int128_getlo(Vj->Q(0));
- temp.D(1) = int128_getlo(Vd->Q(0));
- } else {
- r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
- r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
+ Int128 r[4];
+ int oprsz = simd_oprsz(desc);
- temp.D(0) = int128_getlo(int128_add(int128_rshift(Vj->Q(0), imm), r1));
- temp.D(1) = int128_getlo(int128_add(int128_rshift(Vd->Q(0), imm), r2));
+ for (i = 0; i < oprsz / 16; i++) {
+ if (imm == 0) {
+ temp.D(2 * i) = int128_getlo(Vj->Q(i));
+ temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
+ } else {
+ r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)),
+ int128_one());
+ r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)),
+ int128_one());
+ temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i),
+ imm), r[2 * i]));
+ temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i),
+ imm), r[2 * i + 1]));
+ }
}
*Vd = temp;
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 01934e5f97..c60a48a16a 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3865,6 +3865,12 @@ TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
+TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
+TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
+TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
+TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
+TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
+TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
@@ -3874,6 +3880,14 @@ TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
+TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
+TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
+TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
+TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
+TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
+TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
+TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
+TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 40/57] target/loongarch: Implement xvssrln xvssran
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (38 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 39/57] target/loongarch: Implement xvsrlrn xvsrarn Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 22:07 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 41/57] target/loongarch: Implement xvssrlrn xvssrarn Song Gao
` (16 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSSRLN.{B.H/H.W/W.D};
- XVSSRAN.{B.H/H.W/W.D};
- XVSSRLN.{BU.H/HU.W/WU.D};
- XVSSRAN.{BU.H/HU.W/WU.D};
- XVSSRLNI.{B.H/H.W/W.D/D.Q};
- XVSSRANI.{B.H/H.W/W.D/D.Q};
- XVSSRLNI.{BU.H/HU.W/WU.D/DU.Q};
- XVSSRANI.{BU.H/HU.W/WU.D/DU.Q}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 30 ++
target/loongarch/disas.c | 30 ++
target/loongarch/vec_helper.c | 456 ++++++++++++--------
target/loongarch/insn_trans/trans_vec.c.inc | 28 ++
4 files changed, 353 insertions(+), 191 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index d7c50b14ca..022dd9bfd1 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1710,6 +1710,36 @@ xvsrarni_h_w 0111 01110101 11001 ..... ..... ..... @vv_ui5
xvsrarni_w_d 0111 01110101 1101 ...... ..... ..... @vv_ui6
xvsrarni_d_q 0111 01110101 111 ....... ..... ..... @vv_ui7
+xvssrln_b_h 0111 01001111 11001 ..... ..... ..... @vvv
+xvssrln_h_w 0111 01001111 11010 ..... ..... ..... @vvv
+xvssrln_w_d 0111 01001111 11011 ..... ..... ..... @vvv
+xvssran_b_h 0111 01001111 11101 ..... ..... ..... @vvv
+xvssran_h_w 0111 01001111 11110 ..... ..... ..... @vvv
+xvssran_w_d 0111 01001111 11111 ..... ..... ..... @vvv
+xvssrln_bu_h 0111 01010000 01001 ..... ..... ..... @vvv
+xvssrln_hu_w 0111 01010000 01010 ..... ..... ..... @vvv
+xvssrln_wu_d 0111 01010000 01011 ..... ..... ..... @vvv
+xvssran_bu_h 0111 01010000 01101 ..... ..... ..... @vvv
+xvssran_hu_w 0111 01010000 01110 ..... ..... ..... @vvv
+xvssran_wu_d 0111 01010000 01111 ..... ..... ..... @vvv
+
+xvssrlni_b_h 0111 01110100 10000 1 .... ..... ..... @vv_ui4
+xvssrlni_h_w 0111 01110100 10001 ..... ..... ..... @vv_ui5
+xvssrlni_w_d 0111 01110100 1001 ...... ..... ..... @vv_ui6
+xvssrlni_d_q 0111 01110100 101 ....... ..... ..... @vv_ui7
+xvssrani_b_h 0111 01110110 00000 1 .... ..... ..... @vv_ui4
+xvssrani_h_w 0111 01110110 00001 ..... ..... ..... @vv_ui5
+xvssrani_w_d 0111 01110110 0001 ...... ..... ..... @vv_ui6
+xvssrani_d_q 0111 01110110 001 ....... ..... ..... @vv_ui7
+xvssrlni_bu_h 0111 01110100 11000 1 .... ..... ..... @vv_ui4
+xvssrlni_hu_w 0111 01110100 11001 ..... ..... ..... @vv_ui5
+xvssrlni_wu_d 0111 01110100 1101 ...... ..... ..... @vv_ui6
+xvssrlni_du_q 0111 01110100 111 ....... ..... ..... @vv_ui7
+xvssrani_bu_h 0111 01110110 01000 1 .... ..... ..... @vv_ui4
+xvssrani_hu_w 0111 01110110 01001 ..... ..... ..... @vv_ui5
+xvssrani_wu_d 0111 01110110 0101 ...... ..... ..... @vv_ui6
+xvssrani_du_q 0111 01110110 011 ....... ..... ..... @vv_ui7
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index e7b5974eaa..c02f31019f 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2136,6 +2136,36 @@ INSN_LASX(xvsrarni_h_w, vv_i)
INSN_LASX(xvsrarni_w_d, vv_i)
INSN_LASX(xvsrarni_d_q, vv_i)
+INSN_LASX(xvssrln_b_h, vvv)
+INSN_LASX(xvssrln_h_w, vvv)
+INSN_LASX(xvssrln_w_d, vvv)
+INSN_LASX(xvssran_b_h, vvv)
+INSN_LASX(xvssran_h_w, vvv)
+INSN_LASX(xvssran_w_d, vvv)
+INSN_LASX(xvssrln_bu_h, vvv)
+INSN_LASX(xvssrln_hu_w, vvv)
+INSN_LASX(xvssrln_wu_d, vvv)
+INSN_LASX(xvssran_bu_h, vvv)
+INSN_LASX(xvssran_hu_w, vvv)
+INSN_LASX(xvssran_wu_d, vvv)
+
+INSN_LASX(xvssrlni_b_h, vv_i)
+INSN_LASX(xvssrlni_h_w, vv_i)
+INSN_LASX(xvssrlni_w_d, vv_i)
+INSN_LASX(xvssrlni_d_q, vv_i)
+INSN_LASX(xvssrani_b_h, vv_i)
+INSN_LASX(xvssrani_h_w, vv_i)
+INSN_LASX(xvssrani_w_d, vv_i)
+INSN_LASX(xvssrani_d_q, vv_i)
+INSN_LASX(xvssrlni_bu_h, vv_i)
+INSN_LASX(xvssrlni_hu_w, vv_i)
+INSN_LASX(xvssrlni_wu_d, vv_i)
+INSN_LASX(xvssrlni_du_q, vv_i)
+INSN_LASX(xvssrani_bu_h, vv_i)
+INSN_LASX(xvssrani_hu_w, vv_i)
+INSN_LASX(xvssrani_wu_d, vv_i)
+INSN_LASX(xvssrani_du_q, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index a3f9b396fa..e8dd95eaed 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -1401,23 +1401,29 @@ SSRLNS(B, uint16_t, int16_t, uint8_t)
SSRLNS(H, uint32_t, int32_t, uint16_t)
SSRLNS(W, uint64_t, int64_t, uint32_t)
-#define VSSRLN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
- } \
- Vd->D(1) = 0; \
+#define VSSRLN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
-VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
-VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
+VSSRLN(vssrln_b_h, 16, B, H, UH)
+VSSRLN(vssrln_h_w, 32, H, W, UW)
+VSSRLN(vssrln_w_d, 64, W, D, UD)
#define SSRANS(E, T1, T2) \
static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
@@ -1429,10 +1435,10 @@ static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
shft_res = e2 >> sa; \
} \
T2 mask; \
- mask = (1ll << sh) -1; \
+ mask = (1ll << sh) - 1; \
if (shft_res > mask) { \
return mask; \
- } else if (shft_res < -(mask +1)) { \
+ } else if (shft_res < -(mask + 1)) { \
return ~mask; \
} else { \
return shft_res; \
@@ -1443,23 +1449,29 @@ SSRANS(B, int16_t, int8_t)
SSRANS(H, int32_t, int16_t)
SSRANS(W, int64_t, int32_t)
-#define VSSRAN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
- } \
- Vd->D(1) = 0; \
+#define VSSRAN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRAN(vssran_b_h, 16, uint16_t, B, H)
-VSSRAN(vssran_h_w, 32, uint32_t, H, W)
-VSSRAN(vssran_w_d, 64, uint64_t, W, D)
+VSSRAN(vssran_b_h, 16, B, H, UH)
+VSSRAN(vssran_h_w, 32, H, W, UW)
+VSSRAN(vssran_w_d, 64, W, D, UD)
#define SSRLNU(E, T1, T2, T3) \
static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
@@ -1471,7 +1483,7 @@ static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
shft_res = (((T1)e2) >> sa); \
} \
T2 mask; \
- mask = (1ull << sh) -1; \
+ mask = (1ull << sh) - 1; \
if (shft_res > mask) { \
return mask; \
} else { \
@@ -1483,23 +1495,29 @@ SSRLNU(B, uint16_t, uint8_t, int16_t)
SSRLNU(H, uint32_t, uint16_t, int32_t)
SSRLNU(W, uint64_t, uint32_t, int64_t)
-#define VSSRLNU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
- } \
- Vd->D(1) = 0; \
+#define VSSRLNU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
-VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
-VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
+VSSRLNU(vssrln_bu_h, 16, B, H, UH)
+VSSRLNU(vssrln_hu_w, 32, H, W, UW)
+VSSRLNU(vssrln_wu_d, 64, W, D, UD)
#define SSRANU(E, T1, T2, T3) \
static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
@@ -1514,7 +1532,7 @@ static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
shft_res = 0; \
} \
T2 mask; \
- mask = (1ull << sh) -1; \
+ mask = (1ull << sh) - 1; \
if (shft_res > mask) { \
return mask; \
} else { \
@@ -1526,64 +1544,89 @@ SSRANU(B, uint16_t, uint8_t, int16_t)
SSRANU(H, uint32_t, uint16_t, int32_t)
SSRANU(W, uint64_t, uint32_t, int64_t)
-#define VSSRANU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
- } \
- Vd->D(1) = 0; \
-}
-
-VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
-VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
-VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
-
-#define VSSRLNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
- } \
- *Vd = temp; \
+#define VSSRANU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- Int128 shft_res1, shft_res2, mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
+VSSRANU(vssran_bu_h, 16, B, H, UH)
+VSSRANU(vssran_hu_w, 32, H, W, UW)
+VSSRANU(vssran_wu_d, 64, W, D, UD)
+
+#define VSSRLNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrlni_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask)
+{
+ Int128 shft_res1, shft_res2;
if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
} else {
- shft_res1 = int128_urshift(Vj->Q(0), imm);
- shft_res2 = int128_urshift(Vd->Q(0), imm);
+ shft_res1 = int128_urshift(Vj->Q(idx), imm);
+ shft_res2 = int128_urshift(Vd->Q(idx), imm);
}
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
if (int128_ult(mask, shft_res1)) {
- Vd->D(0) = int128_getlo(mask);
+ Vd->D(idx * 2) = int128_getlo(mask);
}else {
- Vd->D(0) = int128_getlo(shft_res1);
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
}
if (int128_ult(mask, shft_res2)) {
- Vd->D(1) = int128_getlo(mask);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
}else {
- Vd->D(1) = int128_getlo(shft_res2);
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrlni_q(Vd, Vj, imm, i, mask);
}
}
@@ -1591,98 +1634,111 @@ VSSRLNI(vssrlni_b_h, 16, B, H)
VSSRLNI(vssrlni_h_w, 32, H, W)
VSSRLNI(vssrlni_w_d, 64, W, D)
-#define VSSRANI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- Int128 shft_res1, shft_res2, mask, min;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
+#define VSSRANI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrani_d_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask, Int128 min)
+{
+ Int128 shft_res1, shft_res2;
if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
} else {
- shft_res1 = int128_rshift(Vj->Q(0), imm);
- shft_res2 = int128_rshift(Vd->Q(0), imm);
+ shft_res1 = int128_rshift(Vj->Q(idx), imm);
+ shft_res2 = int128_rshift(Vd->Q(idx), imm);
}
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
- min = int128_lshift(int128_one(), 63);
- if (int128_gt(shft_res1, mask)) {
- Vd->D(0) = int128_getlo(mask);
+ if (int128_gt(shft_res1, mask)) {
+ Vd->D(idx * 2) = int128_getlo(mask);
} else if (int128_lt(shft_res1, int128_neg(min))) {
- Vd->D(0) = int128_getlo(min);
+ Vd->D(idx * 2) = int128_getlo(min);
} else {
- Vd->D(0) = int128_getlo(shft_res1);
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
}
if (int128_gt(shft_res2, mask)) {
- Vd->D(1) = int128_getlo(mask);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
} else if (int128_lt(shft_res2, int128_neg(min))) {
- Vd->D(1) = int128_getlo(min);
+ Vd->D(idx * 2 + 1) = int128_getlo(min);
} else {
- Vd->D(1) = int128_getlo(shft_res2);
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask, min;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+ min = int128_lshift(int128_one(), 63);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrani_d_q(Vd, Vj, imm, i, mask, min);
}
}
+
VSSRANI(vssrani_b_h, 16, B, H)
VSSRANI(vssrani_h_w, 32, H, W)
VSSRANI(vssrani_w_d, 64, W, D)
-#define VSSRLNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
- } \
- *Vd = temp; \
+#define VSSRLNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
}
void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- Int128 shft_res1, shft_res2, mask;
+ int i;
+ Int128 mask;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
- } else {
- shft_res1 = int128_urshift(Vj->Q(0), imm);
- shft_res2 = int128_urshift(Vd->Q(0), imm);
- }
mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
- if (int128_ult(mask, shft_res1)) {
- Vd->D(0) = int128_getlo(mask);
- }else {
- Vd->D(0) = int128_getlo(shft_res1);
- }
-
- if (int128_ult(mask, shft_res2)) {
- Vd->D(1) = int128_getlo(mask);
- }else {
- Vd->D(1) = int128_getlo(shft_res2);
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrlni_q(Vd, Vj, imm, i, mask);
}
}
@@ -1690,55 +1746,73 @@ VSSRLNUI(vssrlni_bu_h, 16, B, H)
VSSRLNUI(vssrlni_hu_w, 32, H, W)
VSSRLNUI(vssrlni_wu_d, 64, W, D)
-#define VSSRANUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \
- temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- Int128 shft_res1, shft_res2, mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
+#define VSSRANUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrani_du_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask)
+{
+ Int128 shft_res1, shft_res2;
if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
} else {
- shft_res1 = int128_rshift(Vj->Q(0), imm);
- shft_res2 = int128_rshift(Vd->Q(0), imm);
+ shft_res1 = int128_rshift(Vj->Q(idx), imm);
+ shft_res2 = int128_rshift(Vd->Q(idx), imm);
}
- if (int128_lt(Vj->Q(0), int128_zero())) {
+ if (int128_lt(Vj->Q(idx), int128_zero())) {
shft_res1 = int128_zero();
}
- if (int128_lt(Vd->Q(0), int128_zero())) {
+ if (int128_lt(Vd->Q(idx), int128_zero())) {
shft_res2 = int128_zero();
}
-
- mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-
if (int128_ult(mask, shft_res1)) {
- Vd->D(0) = int128_getlo(mask);
+ Vd->D(idx * 2) = int128_getlo(mask);
}else {
- Vd->D(0) = int128_getlo(shft_res1);
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
}
if (int128_ult(mask, shft_res2)) {
- Vd->D(1) = int128_getlo(mask);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
}else {
- Vd->D(1) = int128_getlo(shft_res2);
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+
+}
+
+void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrani_du_q(Vd, Vj, imm, i, mask);
}
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index c60a48a16a..952f7fdc46 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3901,6 +3901,18 @@ TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
+TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
+TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
+TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
+TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
+TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
+TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
+TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
+TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
+TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
+TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
+TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
+TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
@@ -3918,6 +3930,22 @@ TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
+TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
+TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
+TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
+TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
+TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
+TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
+TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
+TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
+TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
+TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
+TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
+TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
+TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
+TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
+TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
+TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 40/57] target/loongarch: Implement xvssrln xvssran
2023-09-07 8:31 ` [PATCH RESEND v5 40/57] target/loongarch: Implement xvssrln xvssran Song Gao
@ 2023-09-11 22:07 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 22:07 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> This patch includes:
> - XVSSRLN.{B.H/H.W/W.D};
> - XVSSRAN.{B.H/H.W/W.D};
> - XVSSRLN.{BU.H/HU.W/WU.D};
> - XVSSRAN.{BU.H/HU.W/WU.D};
> - XVSSRLNI.{B.H/H.W/W.D/D.Q};
> - XVSSRANI.{B.H/H.W/W.D/D.Q};
> - XVSSRLNI.{BU.H/HU.W/WU.D/DU.Q};
> - XVSSRANI.{BU.H/HU.W/WU.D/DU.Q}.
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/insns.decode | 30 ++
> target/loongarch/disas.c | 30 ++
> target/loongarch/vec_helper.c | 456 ++++++++++++--------
> target/loongarch/insn_trans/trans_vec.c.inc | 28 ++
> 4 files changed, 353 insertions(+), 191 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 41/57] target/loongarch: Implement xvssrlrn xvssrarn
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (39 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 40/57] target/loongarch: Implement xvssrln xvssran Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 22:13 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 42/57] target/loongarch: Implement xvclo xvclz Song Gao
` (15 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSSRLRN.{B.H/H.W/W.D};
- XVSSRARN.{B.H/H.W/W.D};
- XVSSRLRN.{BU.H/HU.W/WU.D};
- XVSSRARN.{BU.H/HU.W/WU.D};
- XVSSRLRNI.{B.H/H.W/W.D/D.Q};
- XVSSRARNI.{B.H/H.W/W.D/D.Q};
- XVSSRLRNI.{BU.H/HU.W/WU.D/DU.Q};
- XVSSRARNI.{BU.H/HU.W/WU.D/DU.Q}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 30 ++
target/loongarch/disas.c | 30 ++
target/loongarch/vec_helper.c | 489 ++++++++++++--------
target/loongarch/insn_trans/trans_vec.c.inc | 28 ++
4 files changed, 378 insertions(+), 199 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 022dd9bfd1..dc74bae7a5 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1740,6 +1740,36 @@ xvssrani_hu_w 0111 01110110 01001 ..... ..... ..... @vv_ui5
xvssrani_wu_d 0111 01110110 0101 ...... ..... ..... @vv_ui6
xvssrani_du_q 0111 01110110 011 ....... ..... ..... @vv_ui7
+xvssrlrn_b_h 0111 01010000 00001 ..... ..... ..... @vvv
+xvssrlrn_h_w 0111 01010000 00010 ..... ..... ..... @vvv
+xvssrlrn_w_d 0111 01010000 00011 ..... ..... ..... @vvv
+xvssrarn_b_h 0111 01010000 00101 ..... ..... ..... @vvv
+xvssrarn_h_w 0111 01010000 00110 ..... ..... ..... @vvv
+xvssrarn_w_d 0111 01010000 00111 ..... ..... ..... @vvv
+xvssrlrn_bu_h 0111 01010000 10001 ..... ..... ..... @vvv
+xvssrlrn_hu_w 0111 01010000 10010 ..... ..... ..... @vvv
+xvssrlrn_wu_d 0111 01010000 10011 ..... ..... ..... @vvv
+xvssrarn_bu_h 0111 01010000 10101 ..... ..... ..... @vvv
+xvssrarn_hu_w 0111 01010000 10110 ..... ..... ..... @vvv
+xvssrarn_wu_d 0111 01010000 10111 ..... ..... ..... @vvv
+
+xvssrlrni_b_h 0111 01110101 00000 1 .... ..... ..... @vv_ui4
+xvssrlrni_h_w 0111 01110101 00001 ..... ..... ..... @vv_ui5
+xvssrlrni_w_d 0111 01110101 0001 ...... ..... ..... @vv_ui6
+xvssrlrni_d_q 0111 01110101 001 ....... ..... ..... @vv_ui7
+xvssrarni_b_h 0111 01110110 10000 1 .... ..... ..... @vv_ui4
+xvssrarni_h_w 0111 01110110 10001 ..... ..... ..... @vv_ui5
+xvssrarni_w_d 0111 01110110 1001 ...... ..... ..... @vv_ui6
+xvssrarni_d_q 0111 01110110 101 ....... ..... ..... @vv_ui7
+xvssrlrni_bu_h 0111 01110101 01000 1 .... ..... ..... @vv_ui4
+xvssrlrni_hu_w 0111 01110101 01001 ..... ..... ..... @vv_ui5
+xvssrlrni_wu_d 0111 01110101 0101 ...... ..... ..... @vv_ui6
+xvssrlrni_du_q 0111 01110101 011 ....... ..... ..... @vv_ui7
+xvssrarni_bu_h 0111 01110110 11000 1 .... ..... ..... @vv_ui4
+xvssrarni_hu_w 0111 01110110 11001 ..... ..... ..... @vv_ui5
+xvssrarni_wu_d 0111 01110110 1101 ...... ..... ..... @vv_ui6
+xvssrarni_du_q 0111 01110110 111 ....... ..... ..... @vv_ui7
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index c02f31019f..421eecbb71 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2166,6 +2166,36 @@ INSN_LASX(xvssrani_hu_w, vv_i)
INSN_LASX(xvssrani_wu_d, vv_i)
INSN_LASX(xvssrani_du_q, vv_i)
+INSN_LASX(xvssrlrn_b_h, vvv)
+INSN_LASX(xvssrlrn_h_w, vvv)
+INSN_LASX(xvssrlrn_w_d, vvv)
+INSN_LASX(xvssrarn_b_h, vvv)
+INSN_LASX(xvssrarn_h_w, vvv)
+INSN_LASX(xvssrarn_w_d, vvv)
+INSN_LASX(xvssrlrn_bu_h, vvv)
+INSN_LASX(xvssrlrn_hu_w, vvv)
+INSN_LASX(xvssrlrn_wu_d, vvv)
+INSN_LASX(xvssrarn_bu_h, vvv)
+INSN_LASX(xvssrarn_hu_w, vvv)
+INSN_LASX(xvssrarn_wu_d, vvv)
+
+INSN_LASX(xvssrlrni_b_h, vv_i)
+INSN_LASX(xvssrlrni_h_w, vv_i)
+INSN_LASX(xvssrlrni_w_d, vv_i)
+INSN_LASX(xvssrlrni_d_q, vv_i)
+INSN_LASX(xvssrlrni_bu_h, vv_i)
+INSN_LASX(xvssrlrni_hu_w, vv_i)
+INSN_LASX(xvssrlrni_wu_d, vv_i)
+INSN_LASX(xvssrlrni_du_q, vv_i)
+INSN_LASX(xvssrarni_b_h, vv_i)
+INSN_LASX(xvssrarni_h_w, vv_i)
+INSN_LASX(xvssrarni_w_d, vv_i)
+INSN_LASX(xvssrarni_d_q, vv_i)
+INSN_LASX(xvssrarni_bu_h, vv_i)
+INSN_LASX(xvssrarni_hu_w, vv_i)
+INSN_LASX(xvssrarni_wu_d, vv_i)
+INSN_LASX(xvssrarni_du_q, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index e8dd95eaed..53dc53cb09 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -1827,7 +1827,7 @@ static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
\
shft_res = do_vsrlr_ ## E2(e2, sa); \
T1 mask; \
- mask = (1ull << sh) -1; \
+ mask = (1ull << sh) - 1; \
if (shft_res > mask) { \
return mask; \
} else { \
@@ -1839,23 +1839,29 @@ SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
-#define VSSRLRN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
- } \
- Vd->D(1) = 0; \
+#define VSSRLRN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRLRN(vssrlrn_b_h, 16, uint16_t, B, H)
-VSSRLRN(vssrlrn_h_w, 32, uint32_t, H, W)
-VSSRLRN(vssrlrn_w_d, 64, uint64_t, W, D)
+VSSRLRN(vssrlrn_b_h, 16, B, H, UH)
+VSSRLRN(vssrlrn_h_w, 32, H, W, UW)
+VSSRLRN(vssrlrn_w_d, 64, W, D, UD)
#define SSRARNS(E1, E2, T1, T2) \
static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
@@ -1864,7 +1870,7 @@ static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
\
shft_res = do_vsrar_ ## E2(e2, sa); \
T2 mask; \
- mask = (1ll << sh) -1; \
+ mask = (1ll << sh) - 1; \
if (shft_res > mask) { \
return mask; \
} else if (shft_res < -(mask +1)) { \
@@ -1878,23 +1884,29 @@ SSRARNS(B, H, int16_t, int8_t)
SSRARNS(H, W, int32_t, int16_t)
SSRARNS(W, D, int64_t, int32_t)
-#define VSSRARN(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrarns_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
- } \
- Vd->D(1) = 0; \
+#define VSSRARN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT/ 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRARN(vssrarn_b_h, 16, uint16_t, B, H)
-VSSRARN(vssrarn_h_w, 32, uint32_t, H, W)
-VSSRARN(vssrarn_w_d, 64, uint64_t, W, D)
+VSSRARN(vssrarn_b_h, 16, B, H, UH)
+VSSRARN(vssrarn_h_w, 32, H, W, UW)
+VSSRARN(vssrarn_w_d, 64, W, D, UD)
#define SSRLRNU(E1, E2, T1, T2, T3) \
static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
@@ -1904,7 +1916,7 @@ static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
shft_res = do_vsrlr_ ## E2(e2, sa); \
\
T2 mask; \
- mask = (1ull << sh) -1; \
+ mask = (1ull << sh) - 1; \
if (shft_res > mask) { \
return mask; \
} else { \
@@ -1916,23 +1928,29 @@ SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
-#define VSSRLRNU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
- } \
- Vd->D(1) = 0; \
+#define VSSRLRNU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRLRNU(vssrlrn_bu_h, 16, uint16_t, B, H)
-VSSRLRNU(vssrlrn_hu_w, 32, uint32_t, H, W)
-VSSRLRNU(vssrlrn_wu_d, 64, uint64_t, W, D)
+VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH)
+VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW)
+VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD)
#define SSRARNU(E1, E2, T1, T2, T3) \
static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
@@ -1945,7 +1963,7 @@ static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
shft_res = do_vsrar_ ## E2(e2, sa); \
} \
T2 mask; \
- mask = (1ull << sh) -1; \
+ mask = (1ull << sh) - 1; \
if (shft_res > mask) { \
return mask; \
} else { \
@@ -1957,126 +1975,162 @@ SSRARNU(B, H, uint16_t, uint8_t, int16_t)
SSRARNU(H, W, uint32_t, uint16_t, int32_t)
SSRARNU(W, D, uint64_t, uint32_t, int64_t)
-#define VSSRARNU(NAME, BIT, T, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
- } \
- Vd->D(1) = 0; \
+#define VSSRARNU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
}
-VSSRARNU(vssrarn_bu_h, 16, uint16_t, B, H)
-VSSRARNU(vssrarn_hu_w, 32, uint32_t, H, W)
-VSSRARNU(vssrarn_wu_d, 64, uint64_t, W, D)
+VSSRARNU(vssrarn_bu_h, 16, B, H, UH)
+VSSRARNU(vssrarn_hu_w, 32, H, W, UW)
+VSSRARNU(vssrarn_wu_d, 64, W, D, UD)
+
+#define VSSRLRNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrlrni_q(VReg *Vd, VReg * Vj,
+ uint64_t imm, int idx, Int128 mask)
+{
+ Int128 shft_res1, shft_res2, r1, r2;
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one());
+ r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one());
+ shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1));
+ shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2));
+ }
-#define VSSRLRNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrlrns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrlrns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
- } \
- *Vd = temp; \
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(idx * 2) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
}
-#define VSSRLRNI_Q(NAME, sh) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- Int128 shft_res1, shft_res2, mask, r1, r2; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- if (imm == 0) { \
- shft_res1 = Vj->Q(0); \
- shft_res2 = Vd->Q(0); \
- } else { \
- r1 = int128_and(int128_urshift(Vj->Q(0), (imm -1)), int128_one()); \
- r2 = int128_and(int128_urshift(Vd->Q(0), (imm -1)), int128_one()); \
- \
- shft_res1 = (int128_add(int128_urshift(Vj->Q(0), imm), r1)); \
- shft_res2 = (int128_add(int128_urshift(Vd->Q(0), imm), r2)); \
- } \
- \
- mask = int128_sub(int128_lshift(int128_one(), sh), int128_one()); \
- \
- if (int128_ult(mask, shft_res1)) { \
- Vd->D(0) = int128_getlo(mask); \
- }else { \
- Vd->D(0) = int128_getlo(shft_res1); \
- } \
- \
- if (int128_ult(mask, shft_res2)) { \
- Vd->D(1) = int128_getlo(mask); \
- }else { \
- Vd->D(1) = int128_getlo(shft_res2); \
- } \
+void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+ for (i = 0; i < oprsz / 16; i ++) {
+ do_vssrlrni_q(Vd, Vj, imm, i, mask);
+ }
}
VSSRLRNI(vssrlrni_b_h, 16, B, H)
VSSRLRNI(vssrlrni_h_w, 32, H, W)
VSSRLRNI(vssrlrni_w_d, 64, W, D)
-VSSRLRNI_Q(vssrlrni_d_q, 63)
-
-#define VSSRARNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrarns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrarns_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
- } \
- *Vd = temp; \
-}
-void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
+#define VSSRARNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrarni_d_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask1, Int128 mask2)
+{
+ Int128 shft_res1, shft_res2, r1, r2;
if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
} else {
- r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
- r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
-
- shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
- shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
+ r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
+ r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
+ shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
+ shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
}
-
- mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
- mask2 = int128_lshift(int128_one(), 63);
-
- if (int128_gt(shft_res1, mask1)) {
- Vd->D(0) = int128_getlo(mask1);
+ if (int128_gt(shft_res1, mask1)) {
+ Vd->D(idx * 2) = int128_getlo(mask1);
} else if (int128_lt(shft_res1, int128_neg(mask2))) {
- Vd->D(0) = int128_getlo(mask2);
+ Vd->D(idx * 2) = int128_getlo(mask2);
} else {
- Vd->D(0) = int128_getlo(shft_res1);
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
}
if (int128_gt(shft_res2, mask1)) {
- Vd->D(1) = int128_getlo(mask1);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask1);
} else if (int128_lt(shft_res2, int128_neg(mask2))) {
- Vd->D(1) = int128_getlo(mask2);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask2);
} else {
- Vd->D(1) = int128_getlo(shft_res2);
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask1, mask2;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+ mask2 = int128_lshift(int128_one(), 63);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2);
}
}
@@ -2084,82 +2138,119 @@ VSSRARNI(vssrarni_b_h, 16, B, H)
VSSRARNI(vssrarni_h_w, 32, H, W)
VSSRARNI(vssrarni_w_d, 64, W, D)
-#define VSSRLRNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrlrnu_ ## E1(Vj->E2(i), imm, BIT/2); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrlrnu_ ## E1(Vd->E2(i), imm, BIT/2); \
- } \
- *Vd = temp; \
+#define VSSRLRNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ for (i = 0; i < oprsz / 16; i ++) {
+ do_vssrlrni_q(Vd, Vj, imm, i, mask);
+ }
}
VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
-VSSRLRNI_Q(vssrlrni_du_q, 64)
-#define VSSRARNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E1(i) = do_ssrarnu_ ## E1(Vj->E2(i), imm, BIT/2); \
- temp.E1(i + LSX_LEN/BIT) = do_ssrarnu_ ## E1(Vd->E2(i), imm, BIT/2); \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- Int128 shft_res1, shft_res2, mask1, mask2, r1, r2;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
+#define VSSRARNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrarni_du_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask1, Int128 mask2)
+{
+ Int128 shft_res1, shft_res2, r1, r2;
if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
} else {
- r1 = int128_and(int128_rshift(Vj->Q(0), (imm -1)), int128_one());
- r2 = int128_and(int128_rshift(Vd->Q(0), (imm -1)), int128_one());
-
- shft_res1 = int128_add(int128_rshift(Vj->Q(0), imm), r1);
- shft_res2 = int128_add(int128_rshift(Vd->Q(0), imm), r2);
+ r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
+ r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
+ shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
+ shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
}
- if (int128_lt(Vj->Q(0), int128_zero())) {
+ if (int128_lt(Vj->Q(idx), int128_zero())) {
shft_res1 = int128_zero();
}
- if (int128_lt(Vd->Q(0), int128_zero())) {
+ if (int128_lt(Vd->Q(idx), int128_zero())) {
shft_res2 = int128_zero();
}
- mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
- mask2 = int128_lshift(int128_one(), 64);
-
if (int128_gt(shft_res1, mask1)) {
- Vd->D(0) = int128_getlo(mask1);
+ Vd->D(idx * 2) = int128_getlo(mask1);
} else if (int128_lt(shft_res1, int128_neg(mask2))) {
- Vd->D(0) = int128_getlo(mask2);
+ Vd->D(idx * 2) = int128_getlo(mask2);
} else {
- Vd->D(0) = int128_getlo(shft_res1);
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
}
if (int128_gt(shft_res2, mask1)) {
- Vd->D(1) = int128_getlo(mask1);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask1);
} else if (int128_lt(shft_res2, int128_neg(mask2))) {
- Vd->D(1) = int128_getlo(mask2);
+ Vd->D(idx * 2 + 1) = int128_getlo(mask2);
} else {
- Vd->D(1) = int128_getlo(shft_res2);
+ Vd->D(idx * 1 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask1, mask2;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+ mask2 = int128_lshift(int128_one(), 64);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2);
}
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 952f7fdc46..c9d0897acf 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -3959,6 +3959,18 @@ TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
+TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
+TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
+TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
+TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
+TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
+TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
+TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
+TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
+TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
+TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
+TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
+TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
@@ -3976,6 +3988,22 @@ TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
+TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
+TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
+TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
+TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
+TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
+TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
+TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
+TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
+TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
+TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
+TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
+TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
+TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
+TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
+TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
+TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 41/57] target/loongarch: Implement xvssrlrn xvssrarn
2023-09-07 8:31 ` [PATCH RESEND v5 41/57] target/loongarch: Implement xvssrlrn xvssrarn Song Gao
@ 2023-09-11 22:13 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 22:13 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> This patch includes:
> - XVSSRLRN.{B.H/H.W/W.D};
> - XVSSRARN.{B.H/H.W/W.D};
> - XVSSRLRN.{BU.H/HU.W/WU.D};
> - XVSSRARN.{BU.H/HU.W/WU.D};
> - XVSSRLRNI.{B.H/H.W/W.D/D.Q};
> - XVSSRARNI.{B.H/H.W/W.D/D.Q};
> - XVSSRLRNI.{BU.H/HU.W/WU.D/DU.Q};
> - XVSSRARNI.{BU.H/HU.W/WU.D/DU.Q}.
>
> Signed-off-by: Song Gao<gaosong@loongson.cn>
> ---
> target/loongarch/insns.decode | 30 ++
> target/loongarch/disas.c | 30 ++
> target/loongarch/vec_helper.c | 489 ++++++++++++--------
> target/loongarch/insn_trans/trans_vec.c.inc | 28 ++
> 4 files changed, 378 insertions(+), 199 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 42/57] target/loongarch: Implement xvclo xvclz
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (40 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 41/57] target/loongarch: Implement xvssrlrn xvssrarn Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 43/57] target/loongarch: Implement xvpcnt Song Gao
` (14 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVCLO.{B/H/W/D};
- XVCLZ.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 9 +++++++++
target/loongarch/disas.c | 9 +++++++++
target/loongarch/vec_helper.c | 3 ++-
target/loongarch/insn_trans/trans_vec.c.inc | 8 ++++++++
4 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index dc74bae7a5..3175532045 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1770,6 +1770,15 @@ xvssrarni_hu_w 0111 01110110 11001 ..... ..... ..... @vv_ui5
xvssrarni_wu_d 0111 01110110 1101 ...... ..... ..... @vv_ui6
xvssrarni_du_q 0111 01110110 111 ....... ..... ..... @vv_ui7
+xvclo_b 0111 01101001 11000 00000 ..... ..... @vv
+xvclo_h 0111 01101001 11000 00001 ..... ..... @vv
+xvclo_w 0111 01101001 11000 00010 ..... ..... @vv
+xvclo_d 0111 01101001 11000 00011 ..... ..... @vv
+xvclz_b 0111 01101001 11000 00100 ..... ..... @vv
+xvclz_h 0111 01101001 11000 00101 ..... ..... @vv
+xvclz_w 0111 01101001 11000 00110 ..... ..... @vv
+xvclz_d 0111 01101001 11000 00111 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 421eecbb71..bbf530b349 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2196,6 +2196,15 @@ INSN_LASX(xvssrarni_hu_w, vv_i)
INSN_LASX(xvssrarni_wu_d, vv_i)
INSN_LASX(xvssrarni_du_q, vv_i)
+INSN_LASX(xvclo_b, vv)
+INSN_LASX(xvclo_h, vv)
+INSN_LASX(xvclo_w, vv)
+INSN_LASX(xvclo_d, vv)
+INSN_LASX(xvclz_b, vv)
+INSN_LASX(xvclz_h, vv)
+INSN_LASX(xvclz_w, vv)
+INSN_LASX(xvclz_d, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 53dc53cb09..461aa12bf5 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2264,8 +2264,9 @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) \
+ for (i = 0; i < oprsz / (BIT / 8); i++) \
{ \
Vd->E(i) = DO_OP(Vj->E(i)); \
} \
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index c9d0897acf..ea555e6ac1 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4013,6 +4013,14 @@ TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
+TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
+TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
+TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
+TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
+TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
+TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
+TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
+TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 43/57] target/loongarch: Implement xvpcnt
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (41 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 42/57] target/loongarch: Implement xvclo xvclz Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 44/57] target/loongarch: Implement xvbitclr xvbitset xvbitrev Song Gao
` (13 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- VPCNT.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 5 +++++
target/loongarch/disas.c | 5 +++++
target/loongarch/vec_helper.c | 3 ++-
target/loongarch/insn_trans/trans_vec.c.inc | 4 ++++
4 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 3175532045..d683c6a6ab 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1779,6 +1779,11 @@ xvclz_h 0111 01101001 11000 00101 ..... ..... @vv
xvclz_w 0111 01101001 11000 00110 ..... ..... @vv
xvclz_d 0111 01101001 11000 00111 ..... ..... @vv
+xvpcnt_b 0111 01101001 11000 01000 ..... ..... @vv
+xvpcnt_h 0111 01101001 11000 01001 ..... ..... @vv
+xvpcnt_w 0111 01101001 11000 01010 ..... ..... @vv
+xvpcnt_d 0111 01101001 11000 01011 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index bbf530b349..ff7f7a792a 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2205,6 +2205,11 @@ INSN_LASX(xvclz_h, vv)
INSN_LASX(xvclz_w, vv)
INSN_LASX(xvclz_d, vv)
+INSN_LASX(xvpcnt_b, vv)
+INSN_LASX(xvpcnt_h, vv)
+INSN_LASX(xvpcnt_w, vv)
+INSN_LASX(xvpcnt_d, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 461aa12bf5..41181ce265 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2296,8 +2296,9 @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) \
+ for (i = 0; i < oprsz / (BIT / 8); i++) \
{ \
Vd->E(i) = FN(Vj->E(i)); \
} \
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index ea555e6ac1..97acbe3676 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4026,6 +4026,10 @@ TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
+TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
+TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
+TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
+TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 44/57] target/loongarch: Implement xvbitclr xvbitset xvbitrev
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (42 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 43/57] target/loongarch: Implement xvpcnt Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 45/57] target/loongarch: Implement xvfrstp Song Gao
` (12 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVBITCLR[I].{B/H/W/D};
- XVBITSET[I].{B/H/W/D};
- XVBITREV[I].{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 27 +++++++++++++
target/loongarch/disas.c | 25 ++++++++++++
target/loongarch/vec_helper.c | 44 +++++++++++----------
target/loongarch/insn_trans/trans_vec.c.inc | 24 +++++++++++
4 files changed, 99 insertions(+), 21 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index d683c6a6ab..cb6db8002a 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1784,6 +1784,33 @@ xvpcnt_h 0111 01101001 11000 01001 ..... ..... @vv
xvpcnt_w 0111 01101001 11000 01010 ..... ..... @vv
xvpcnt_d 0111 01101001 11000 01011 ..... ..... @vv
+xvbitclr_b 0111 01010000 11000 ..... ..... ..... @vvv
+xvbitclr_h 0111 01010000 11001 ..... ..... ..... @vvv
+xvbitclr_w 0111 01010000 11010 ..... ..... ..... @vvv
+xvbitclr_d 0111 01010000 11011 ..... ..... ..... @vvv
+xvbitclri_b 0111 01110001 00000 01 ... ..... ..... @vv_ui3
+xvbitclri_h 0111 01110001 00000 1 .... ..... ..... @vv_ui4
+xvbitclri_w 0111 01110001 00001 ..... ..... ..... @vv_ui5
+xvbitclri_d 0111 01110001 0001 ...... ..... ..... @vv_ui6
+
+xvbitset_b 0111 01010000 11100 ..... ..... ..... @vvv
+xvbitset_h 0111 01010000 11101 ..... ..... ..... @vvv
+xvbitset_w 0111 01010000 11110 ..... ..... ..... @vvv
+xvbitset_d 0111 01010000 11111 ..... ..... ..... @vvv
+xvbitseti_b 0111 01110001 01000 01 ... ..... ..... @vv_ui3
+xvbitseti_h 0111 01110001 01000 1 .... ..... ..... @vv_ui4
+xvbitseti_w 0111 01110001 01001 ..... ..... ..... @vv_ui5
+xvbitseti_d 0111 01110001 0101 ...... ..... ..... @vv_ui6
+
+xvbitrev_b 0111 01010001 00000 ..... ..... ..... @vvv
+xvbitrev_h 0111 01010001 00001 ..... ..... ..... @vvv
+xvbitrev_w 0111 01010001 00010 ..... ..... ..... @vvv
+xvbitrev_d 0111 01010001 00011 ..... ..... ..... @vvv
+xvbitrevi_b 0111 01110001 10000 01 ... ..... ..... @vv_ui3
+xvbitrevi_h 0111 01110001 10000 1 .... ..... ..... @vv_ui4
+xvbitrevi_w 0111 01110001 10001 ..... ..... ..... @vv_ui5
+xvbitrevi_d 0111 01110001 1001 ...... ..... ..... @vv_ui6
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index ff7f7a792a..7f04c912aa 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2210,6 +2210,31 @@ INSN_LASX(xvpcnt_h, vv)
INSN_LASX(xvpcnt_w, vv)
INSN_LASX(xvpcnt_d, vv)
+INSN_LASX(xvbitclr_b, vvv)
+INSN_LASX(xvbitclr_h, vvv)
+INSN_LASX(xvbitclr_w, vvv)
+INSN_LASX(xvbitclr_d, vvv)
+INSN_LASX(xvbitclri_b, vv_i)
+INSN_LASX(xvbitclri_h, vv_i)
+INSN_LASX(xvbitclri_w, vv_i)
+INSN_LASX(xvbitclri_d, vv_i)
+INSN_LASX(xvbitset_b, vvv)
+INSN_LASX(xvbitset_h, vvv)
+INSN_LASX(xvbitset_w, vvv)
+INSN_LASX(xvbitset_d, vvv)
+INSN_LASX(xvbitseti_b, vv_i)
+INSN_LASX(xvbitseti_h, vv_i)
+INSN_LASX(xvbitseti_w, vv_i)
+INSN_LASX(xvbitseti_d, vv_i)
+INSN_LASX(xvbitrev_b, vvv)
+INSN_LASX(xvbitrev_h, vvv)
+INSN_LASX(xvbitrev_w, vvv)
+INSN_LASX(xvbitrev_d, vvv)
+INSN_LASX(xvbitrevi_b, vv_i)
+INSN_LASX(xvbitrevi_h, vv_i)
+INSN_LASX(xvbitrevi_w, vv_i)
+INSN_LASX(xvbitrevi_d, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 41181ce265..a5e92b592d 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2313,17 +2313,18 @@ VPCNT(vpcnt_d, 64, UD, ctpop64)
#define DO_BITSET(a, bit) (a | 1ull << bit)
#define DO_BITREV(a, bit) (a ^ (1ull << bit))
-#define DO_BIT(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
- } \
+#define DO_BIT(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
+ } \
}
DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
@@ -2339,16 +2340,17 @@ DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
-#define DO_BITI(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), imm); \
- } \
+#define DO_BITI(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), imm); \
+ } \
}
DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 97acbe3676..692975e539 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4101,6 +4101,10 @@ TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
+TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
+TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
+TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
+TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
@@ -4171,6 +4175,10 @@ TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
+TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
+TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
+TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
+TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
@@ -4212,6 +4220,10 @@ TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
+TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
+TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
+TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
+TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
int64_t imm, uint32_t oprsz, uint32_t maxsz)
@@ -4253,6 +4265,10 @@ TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
+TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
+TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
+TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
+TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
@@ -4294,6 +4310,10 @@ TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
+TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
+TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
+TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
+TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
int64_t imm, uint32_t oprsz, uint32_t maxsz)
@@ -4335,6 +4355,10 @@ TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
+TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
+TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
+TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
+TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 45/57] target/loongarch: Implement xvfrstp
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (43 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 44/57] target/loongarch: Implement xvbitclr xvbitset xvbitrev Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 46/57] target/loongarch: Implement LASX fpu arith instructions Song Gao
` (11 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVFRSTP[I].{B/H}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 5 ++++
target/loongarch/disas.c | 5 ++++
target/loongarch/vec_helper.c | 32 +++++++++++++--------
target/loongarch/insn_trans/trans_vec.c.inc | 4 +++
4 files changed, 34 insertions(+), 12 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index cb6db8002a..6035fe139c 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1811,6 +1811,11 @@ xvbitrevi_h 0111 01110001 10000 1 .... ..... ..... @vv_ui4
xvbitrevi_w 0111 01110001 10001 ..... ..... ..... @vv_ui5
xvbitrevi_d 0111 01110001 1001 ...... ..... ..... @vv_ui6
+xvfrstp_b 0111 01010010 10110 ..... ..... ..... @vvv
+xvfrstp_h 0111 01010010 10111 ..... ..... ..... @vvv
+xvfrstpi_b 0111 01101001 10100 ..... ..... ..... @vv_ui5
+xvfrstpi_h 0111 01101001 10101 ..... ..... ..... @vv_ui5
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 7f04c912aa..1c4aecaa93 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2235,6 +2235,11 @@ INSN_LASX(xvbitrevi_h, vv_i)
INSN_LASX(xvbitrevi_w, vv_i)
INSN_LASX(xvbitrevi_d, vv_i)
+INSN_LASX(xvfrstp_b, vvv)
+INSN_LASX(xvfrstp_h, vvv)
+INSN_LASX(xvfrstpi_b, vv_i)
+INSN_LASX(xvfrstpi_h, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index a5e92b592d..a6f5afaab7 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2369,18 +2369,22 @@ DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
#define VFRSTP(NAME, BIT, MASK, E) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
- int i, m; \
+ int i, j, m, ofs; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- if (Vj->E(i) < 0) { \
- break; \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ m = Vk->E(i * ofs) & MASK; \
+ for (j = 0; j < ofs; j++) { \
+ if (Vj->E(j + ofs * i) < 0) { \
+ break; \
+ } \
} \
+ Vd->E(m + i * ofs) = j; \
} \
- m = Vk->E(0) & MASK; \
- Vd->E(m) = i; \
}
VFRSTP(vfrstp_b, 8, 0xf, B)
@@ -2389,17 +2393,21 @@ VFRSTP(vfrstp_h, 16, 0x7, H)
#define VFRSTPI(NAME, BIT, E) \
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
- int i, m; \
+ int i, j, m, ofs; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- if (Vj->E(i) < 0) { \
- break; \
+ ofs = LSX_LEN / BIT; \
+ m = imm % ofs; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ if (Vj->E(j + ofs * i) < 0) { \
+ break; \
+ } \
} \
+ Vd->E(m + i * ofs) = j; \
} \
- m = imm % (LSX_LEN/BIT); \
- Vd->E(m) = i; \
}
VFRSTPI(vfrstpi_b, 8, B)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 692975e539..5483672b35 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4364,6 +4364,10 @@ TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
+TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
+TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
+TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
+TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 46/57] target/loongarch: Implement LASX fpu arith instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (44 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 45/57] target/loongarch: Implement xvfrstp Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 47/57] target/loongarch: Implement LASX fpu fcvt instructions Song Gao
` (10 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVF{ADD/SUB/MUL/DIV}.{S/D};
- XVF{MADD/MSUB/NMADD/NMSUB}.{S/D};
- XVF{MAX/MIN}.{S/D};
- XVF{MAXA/MINA}.{S/D};
- XVFLOGB.{S/D};
- XVFCLASS.{S/D};
- XVF{SQRT/RECIP/RSQRT}.{S/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 41 +++++++++++++
target/loongarch/disas.c | 46 +++++++++++++++
target/loongarch/vec_helper.c | 12 ++--
target/loongarch/insn_trans/trans_vec.c.inc | 64 +++++++++++++++++++++
4 files changed, 159 insertions(+), 4 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 6035fe139c..4224b0a4b1 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1816,6 +1816,47 @@ xvfrstp_h 0111 01010010 10111 ..... ..... ..... @vvv
xvfrstpi_b 0111 01101001 10100 ..... ..... ..... @vv_ui5
xvfrstpi_h 0111 01101001 10101 ..... ..... ..... @vv_ui5
+xvfadd_s 0111 01010011 00001 ..... ..... ..... @vvv
+xvfadd_d 0111 01010011 00010 ..... ..... ..... @vvv
+xvfsub_s 0111 01010011 00101 ..... ..... ..... @vvv
+xvfsub_d 0111 01010011 00110 ..... ..... ..... @vvv
+xvfmul_s 0111 01010011 10001 ..... ..... ..... @vvv
+xvfmul_d 0111 01010011 10010 ..... ..... ..... @vvv
+xvfdiv_s 0111 01010011 10101 ..... ..... ..... @vvv
+xvfdiv_d 0111 01010011 10110 ..... ..... ..... @vvv
+
+xvfmadd_s 0000 10100001 ..... ..... ..... ..... @vvvv
+xvfmadd_d 0000 10100010 ..... ..... ..... ..... @vvvv
+xvfmsub_s 0000 10100101 ..... ..... ..... ..... @vvvv
+xvfmsub_d 0000 10100110 ..... ..... ..... ..... @vvvv
+xvfnmadd_s 0000 10101001 ..... ..... ..... ..... @vvvv
+xvfnmadd_d 0000 10101010 ..... ..... ..... ..... @vvvv
+xvfnmsub_s 0000 10101101 ..... ..... ..... ..... @vvvv
+xvfnmsub_d 0000 10101110 ..... ..... ..... ..... @vvvv
+
+xvfmax_s 0111 01010011 11001 ..... ..... ..... @vvv
+xvfmax_d 0111 01010011 11010 ..... ..... ..... @vvv
+xvfmin_s 0111 01010011 11101 ..... ..... ..... @vvv
+xvfmin_d 0111 01010011 11110 ..... ..... ..... @vvv
+
+xvfmaxa_s 0111 01010100 00001 ..... ..... ..... @vvv
+xvfmaxa_d 0111 01010100 00010 ..... ..... ..... @vvv
+xvfmina_s 0111 01010100 00101 ..... ..... ..... @vvv
+xvfmina_d 0111 01010100 00110 ..... ..... ..... @vvv
+
+xvflogb_s 0111 01101001 11001 10001 ..... ..... @vv
+xvflogb_d 0111 01101001 11001 10010 ..... ..... @vv
+
+xvfclass_s 0111 01101001 11001 10101 ..... ..... @vv
+xvfclass_d 0111 01101001 11001 10110 ..... ..... @vv
+
+xvfsqrt_s 0111 01101001 11001 11001 ..... ..... @vv
+xvfsqrt_d 0111 01101001 11001 11010 ..... ..... @vv
+xvfrecip_s 0111 01101001 11001 11101 ..... ..... @vv
+xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv
+xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv
+xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1c4aecaa93..1fb9d7eac1 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1708,6 +1708,11 @@ static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
}
+static void output_vvvv_x(DisasContext *ctx, arg_vvvv *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, x%d, x%d, x%d", a->vd, a->vj, a->vk, a->va);
+}
+
static void output_vvv_x(DisasContext *ctx, arg_vvv * a, const char *mnemonic)
{
output(ctx, mnemonic, "x%d, x%d, x%d", a->vd, a->vj, a->vk);
@@ -2240,6 +2245,47 @@ INSN_LASX(xvfrstp_h, vvv)
INSN_LASX(xvfrstpi_b, vv_i)
INSN_LASX(xvfrstpi_h, vv_i)
+INSN_LASX(xvfadd_s, vvv)
+INSN_LASX(xvfadd_d, vvv)
+INSN_LASX(xvfsub_s, vvv)
+INSN_LASX(xvfsub_d, vvv)
+INSN_LASX(xvfmul_s, vvv)
+INSN_LASX(xvfmul_d, vvv)
+INSN_LASX(xvfdiv_s, vvv)
+INSN_LASX(xvfdiv_d, vvv)
+
+INSN_LASX(xvfmadd_s, vvvv)
+INSN_LASX(xvfmadd_d, vvvv)
+INSN_LASX(xvfmsub_s, vvvv)
+INSN_LASX(xvfmsub_d, vvvv)
+INSN_LASX(xvfnmadd_s, vvvv)
+INSN_LASX(xvfnmadd_d, vvvv)
+INSN_LASX(xvfnmsub_s, vvvv)
+INSN_LASX(xvfnmsub_d, vvvv)
+
+INSN_LASX(xvfmax_s, vvv)
+INSN_LASX(xvfmax_d, vvv)
+INSN_LASX(xvfmin_s, vvv)
+INSN_LASX(xvfmin_d, vvv)
+
+INSN_LASX(xvfmaxa_s, vvv)
+INSN_LASX(xvfmaxa_d, vvv)
+INSN_LASX(xvfmina_s, vvv)
+INSN_LASX(xvfmina_d, vvv)
+
+INSN_LASX(xvflogb_s, vv)
+INSN_LASX(xvflogb_d, vv)
+
+INSN_LASX(xvfclass_s, vv)
+INSN_LASX(xvfclass_d, vv)
+
+INSN_LASX(xvfsqrt_s, vv)
+INSN_LASX(xvfsqrt_d, vv)
+INSN_LASX(xvfrecip_s, vv)
+INSN_LASX(xvfrecip_d, vv)
+INSN_LASX(xvfrsqrt_s, vv)
+INSN_LASX(xvfrsqrt_d, vv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index a6f5afaab7..0c8b0d1e54 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2452,9 +2452,10 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
vec_clear_cause(env); \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
vec_update_fcsr0(env, GETPC()); \
} \
@@ -2486,9 +2487,10 @@ void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
VReg *Va = (VReg *)va; \
+ int oprsz = simd_oprsz(desc); \
\
vec_clear_cause(env); \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
vec_update_fcsr0(env, GETPC()); \
} \
@@ -2512,9 +2514,10 @@ void HELPER(NAME)(void *vd, void *vj, \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
vec_clear_cause(env); \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = FN(env, Vj->E(i)); \
} \
}
@@ -2544,8 +2547,9 @@ void HELPER(NAME)(void *vd, void *vj, \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
Vd->E(i) = FN(env, Vj->E(i)); \
} \
}
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 5483672b35..87466ef669 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -52,6 +52,16 @@ static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
return gen_vvvv_ptr_vl(ctx, a, 16, fn);
}
+static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vvvv_ptr_vl(ctx, a, 32, fn);
+}
+
static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
gen_helper_gvec_4 *fn)
{
@@ -94,6 +104,16 @@ static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
return gen_vvv_ptr_vl(ctx, a, 16, fn);
}
+static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vvv_ptr_vl(ctx, a, 32, fn);
+}
+
static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
gen_helper_gvec_3 *fn)
{
@@ -142,6 +162,16 @@ static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
return gen_vv_ptr_vl(ctx, a, 16, fn);
}
+static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
+ gen_helper_gvec_2_ptr *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vv_ptr_vl(ctx, a, 32, fn);
+}
+
static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
gen_helper_gvec_2 *fn)
{
@@ -4377,6 +4407,14 @@ TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
+TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
+TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
+TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
+TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
+TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
+TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
+TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
+TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
@@ -4386,22 +4424,42 @@ TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
+TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
+TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
+TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
+TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
+TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
+TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
+TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
+TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
+TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
+TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
+TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
+TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
+TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
+TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
+TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
+TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
+TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
+TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
+TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
+TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
@@ -4409,6 +4467,12 @@ TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
+TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
+TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
+TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
+TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
+TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
+TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 47/57] target/loongarch: Implement LASX fpu fcvt instructions
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (45 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 46/57] target/loongarch: Implement LASX fpu arith instructions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 48/57] target/loongarch: Implement xvseq xvsle xvslt Song Gao
` (9 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVFCVT{L/H}.{S.H/D.S};
- XVFCVT.{H.S/S.D};
- XVFRINT[{RNE/RZ/RP/RM}].{S/D};
- XVFTINT[{RNE/RZ/RP/RM}].{W.S/L.D};
- XVFTINT[RZ].{WU.S/LU.D};
- XVFTINT[{RNE/RZ/RP/RM}].W.D;
- XVFTINT[{RNE/RZ/RP/RM}]{L/H}.L.S;
- XVFFINT.{S.W/D.L}[U];
- X[CVFFINT.S.L, VFFINT{L/H}.D.W.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 58 +++++
target/loongarch/disas.c | 56 +++++
target/loongarch/vec_helper.c | 235 +++++++++++++-------
target/loongarch/insn_trans/trans_vec.c.inc | 52 +++++
4 files changed, 315 insertions(+), 86 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 4224b0a4b1..ed4f82e7fe 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1857,6 +1857,64 @@ xvfrecip_d 0111 01101001 11001 11110 ..... ..... @vv
xvfrsqrt_s 0111 01101001 11010 00001 ..... ..... @vv
xvfrsqrt_d 0111 01101001 11010 00010 ..... ..... @vv
+xvfcvtl_s_h 0111 01101001 11011 11010 ..... ..... @vv
+xvfcvth_s_h 0111 01101001 11011 11011 ..... ..... @vv
+xvfcvtl_d_s 0111 01101001 11011 11100 ..... ..... @vv
+xvfcvth_d_s 0111 01101001 11011 11101 ..... ..... @vv
+xvfcvt_h_s 0111 01010100 01100 ..... ..... ..... @vvv
+xvfcvt_s_d 0111 01010100 01101 ..... ..... ..... @vvv
+
+xvfrintrne_s 0111 01101001 11010 11101 ..... ..... @vv
+xvfrintrne_d 0111 01101001 11010 11110 ..... ..... @vv
+xvfrintrz_s 0111 01101001 11010 11001 ..... ..... @vv
+xvfrintrz_d 0111 01101001 11010 11010 ..... ..... @vv
+xvfrintrp_s 0111 01101001 11010 10101 ..... ..... @vv
+xvfrintrp_d 0111 01101001 11010 10110 ..... ..... @vv
+xvfrintrm_s 0111 01101001 11010 10001 ..... ..... @vv
+xvfrintrm_d 0111 01101001 11010 10010 ..... ..... @vv
+xvfrint_s 0111 01101001 11010 01101 ..... ..... @vv
+xvfrint_d 0111 01101001 11010 01110 ..... ..... @vv
+
+xvftintrne_w_s 0111 01101001 11100 10100 ..... ..... @vv
+xvftintrne_l_d 0111 01101001 11100 10101 ..... ..... @vv
+xvftintrz_w_s 0111 01101001 11100 10010 ..... ..... @vv
+xvftintrz_l_d 0111 01101001 11100 10011 ..... ..... @vv
+xvftintrp_w_s 0111 01101001 11100 10000 ..... ..... @vv
+xvftintrp_l_d 0111 01101001 11100 10001 ..... ..... @vv
+xvftintrm_w_s 0111 01101001 11100 01110 ..... ..... @vv
+xvftintrm_l_d 0111 01101001 11100 01111 ..... ..... @vv
+xvftint_w_s 0111 01101001 11100 01100 ..... ..... @vv
+xvftint_l_d 0111 01101001 11100 01101 ..... ..... @vv
+xvftintrz_wu_s 0111 01101001 11100 11100 ..... ..... @vv
+xvftintrz_lu_d 0111 01101001 11100 11101 ..... ..... @vv
+xvftint_wu_s 0111 01101001 11100 10110 ..... ..... @vv
+xvftint_lu_d 0111 01101001 11100 10111 ..... ..... @vv
+
+xvftintrne_w_d 0111 01010100 10111 ..... ..... ..... @vvv
+xvftintrz_w_d 0111 01010100 10110 ..... ..... ..... @vvv
+xvftintrp_w_d 0111 01010100 10101 ..... ..... ..... @vvv
+xvftintrm_w_d 0111 01010100 10100 ..... ..... ..... @vvv
+xvftint_w_d 0111 01010100 10011 ..... ..... ..... @vvv
+
+xvftintrnel_l_s 0111 01101001 11101 01000 ..... ..... @vv
+xvftintrneh_l_s 0111 01101001 11101 01001 ..... ..... @vv
+xvftintrzl_l_s 0111 01101001 11101 00110 ..... ..... @vv
+xvftintrzh_l_s 0111 01101001 11101 00111 ..... ..... @vv
+xvftintrpl_l_s 0111 01101001 11101 00100 ..... ..... @vv
+xvftintrph_l_s 0111 01101001 11101 00101 ..... ..... @vv
+xvftintrml_l_s 0111 01101001 11101 00010 ..... ..... @vv
+xvftintrmh_l_s 0111 01101001 11101 00011 ..... ..... @vv
+xvftintl_l_s 0111 01101001 11101 00000 ..... ..... @vv
+xvftinth_l_s 0111 01101001 11101 00001 ..... ..... @vv
+
+xvffint_s_w 0111 01101001 11100 00000 ..... ..... @vv
+xvffint_d_l 0111 01101001 11100 00010 ..... ..... @vv
+xvffint_s_wu 0111 01101001 11100 00001 ..... ..... @vv
+xvffint_d_lu 0111 01101001 11100 00011 ..... ..... @vv
+xvffintl_d_w 0111 01101001 11100 00100 ..... ..... @vv
+xvffinth_d_w 0111 01101001 11100 00101 ..... ..... @vv
+xvffint_s_l 0111 01010100 10000 ..... ..... ..... @vvv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1fb9d7eac1..f1a1321d0d 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2286,6 +2286,62 @@ INSN_LASX(xvfrecip_d, vv)
INSN_LASX(xvfrsqrt_s, vv)
INSN_LASX(xvfrsqrt_d, vv)
+INSN_LASX(xvfcvtl_s_h, vv)
+INSN_LASX(xvfcvth_s_h, vv)
+INSN_LASX(xvfcvtl_d_s, vv)
+INSN_LASX(xvfcvth_d_s, vv)
+INSN_LASX(xvfcvt_h_s, vvv)
+INSN_LASX(xvfcvt_s_d, vvv)
+
+INSN_LASX(xvfrint_s, vv)
+INSN_LASX(xvfrint_d, vv)
+INSN_LASX(xvfrintrm_s, vv)
+INSN_LASX(xvfrintrm_d, vv)
+INSN_LASX(xvfrintrp_s, vv)
+INSN_LASX(xvfrintrp_d, vv)
+INSN_LASX(xvfrintrz_s, vv)
+INSN_LASX(xvfrintrz_d, vv)
+INSN_LASX(xvfrintrne_s, vv)
+INSN_LASX(xvfrintrne_d, vv)
+
+INSN_LASX(xvftint_w_s, vv)
+INSN_LASX(xvftint_l_d, vv)
+INSN_LASX(xvftintrm_w_s, vv)
+INSN_LASX(xvftintrm_l_d, vv)
+INSN_LASX(xvftintrp_w_s, vv)
+INSN_LASX(xvftintrp_l_d, vv)
+INSN_LASX(xvftintrz_w_s, vv)
+INSN_LASX(xvftintrz_l_d, vv)
+INSN_LASX(xvftintrne_w_s, vv)
+INSN_LASX(xvftintrne_l_d, vv)
+INSN_LASX(xvftint_wu_s, vv)
+INSN_LASX(xvftint_lu_d, vv)
+INSN_LASX(xvftintrz_wu_s, vv)
+INSN_LASX(xvftintrz_lu_d, vv)
+INSN_LASX(xvftint_w_d, vvv)
+INSN_LASX(xvftintrm_w_d, vvv)
+INSN_LASX(xvftintrp_w_d, vvv)
+INSN_LASX(xvftintrz_w_d, vvv)
+INSN_LASX(xvftintrne_w_d, vvv)
+INSN_LASX(xvftintl_l_s, vv)
+INSN_LASX(xvftinth_l_s, vv)
+INSN_LASX(xvftintrml_l_s, vv)
+INSN_LASX(xvftintrmh_l_s, vv)
+INSN_LASX(xvftintrpl_l_s, vv)
+INSN_LASX(xvftintrph_l_s, vv)
+INSN_LASX(xvftintrzl_l_s, vv)
+INSN_LASX(xvftintrzh_l_s, vv)
+INSN_LASX(xvftintrnel_l_s, vv)
+INSN_LASX(xvftintrneh_l_s, vv)
+
+INSN_LASX(xvffint_s_w, vv)
+INSN_LASX(xvffint_s_wu, vv)
+INSN_LASX(xvffint_d_l, vv)
+INSN_LASX(xvffint_d_lu, vv)
+INSN_LASX(xvffintl_d_w, vv)
+INSN_LASX(xvffinth_d_w, vv)
+INSN_LASX(xvffint_s_l, vvv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 0c8b0d1e54..9dcec7ad40 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -2624,14 +2624,19 @@ static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
void HELPER(vfcvtl_s_h)(void *vd, void *vj,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 32;
vec_clear_cause(env);
- for (i = 0; i < LSX_LEN/32; i++) {
- temp.UW(i) = float16_cvt_float32(Vj->UH(i), &env->fp_status);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2640,14 +2645,19 @@ void HELPER(vfcvtl_s_h)(void *vd, void *vj,
void HELPER(vfcvtl_d_s)(void *vd, void *vj,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 64;
vec_clear_cause(env);
- for (i = 0; i < LSX_LEN/64; i++) {
- temp.UD(i) = float32_cvt_float64(Vj->UW(i), &env->fp_status);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2656,14 +2666,19 @@ void HELPER(vfcvtl_d_s)(void *vd, void *vj,
void HELPER(vfcvth_s_h)(void *vd, void *vj,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 32;
vec_clear_cause(env);
- for (i = 0; i < LSX_LEN/32; i++) {
- temp.UW(i) = float16_cvt_float32(Vj->UH(i + 4), &env->fp_status);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2672,14 +2687,19 @@ void HELPER(vfcvth_s_h)(void *vd, void *vj,
void HELPER(vfcvth_d_s)(void *vd, void *vj,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 64;
vec_clear_cause(env);
- for (i = 0; i < LSX_LEN/64; i++) {
- temp.UD(i) = float32_cvt_float64(Vj->UW(i + 2), &env->fp_status);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2688,16 +2708,22 @@ void HELPER(vfcvth_d_s)(void *vd, void *vj,
void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 32;
vec_clear_cause(env);
- for(i = 0; i < LSX_LEN/32; i++) {
- temp.UH(i + 4) = float32_cvt_float16(Vj->UW(i), &env->fp_status);
- temp.UH(i) = float32_cvt_float16(Vk->UW(i), &env->fp_status);
+ for(i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i),
+ &env->fp_status);
+ temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2706,16 +2732,22 @@ void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 64;
vec_clear_cause(env);
- for(i = 0; i < LSX_LEN/64; i++) {
- temp.UW(i + 2) = float64_cvt_float32(Vj->UD(i), &env->fp_status);
- temp.UW(i) = float64_cvt_float32(Vk->UD(i), &env->fp_status);
+ for(i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i),
+ &env->fp_status);
+ temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2727,9 +2759,10 @@ void HELPER(vfrint_s)(void *vd, void *vj,
int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
vec_clear_cause(env);
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < oprsz / 4; i++) {
Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
vec_update_fcsr0(env, GETPC());
}
@@ -2741,9 +2774,10 @@ void HELPER(vfrint_d)(void *vd, void *vj,
int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
vec_clear_cause(env);
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < oprsz / 8; i++) {
Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
vec_update_fcsr0(env, GETPC());
}
@@ -2756,9 +2790,10 @@ void HELPER(NAME)(void *vd, void *vj, \
int i; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
vec_clear_cause(env); \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
set_float_rounding_mode(MODE, &env->fp_status); \
Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
@@ -2843,22 +2878,26 @@ FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
-#define FTINT_W_D(NAME, FN) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- vec_clear_cause(env); \
- for (i = 0; i < 2; i++) { \
- temp.W(i + 2) = FN(env, Vj->UD(i)); \
- temp.W(i) = FN(env, Vk->UD(i)); \
- } \
- *Vd = temp; \
+#define FTINT_W_D(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / 64; \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \
+ temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \
+ } \
+ } \
+ *Vd = temp; \
}
FTINT_W_D(vftint_w_d, do_float64_to_int32)
@@ -2876,20 +2915,24 @@ FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
-#define FTINTL_L_S(NAME, FN) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- vec_clear_cause(env); \
- for (i = 0; i < 2; i++) { \
- temp.D(i) = FN(env, Vj->UW(i)); \
- } \
- *Vd = temp; \
+#define FTINTL_L_S(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / 64; \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \
+ } \
+ } \
+ *Vd = temp; \
}
FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
@@ -2898,20 +2941,24 @@ FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
-#define FTINTH_L_S(NAME, FN) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- \
- vec_clear_cause(env); \
- for (i = 0; i < 2; i++) { \
- temp.D(i) = FN(env, Vj->UW(i + 2)); \
- } \
- *Vd = temp; \
+#define FTINTH_L_S(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / 64; \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \
+ } \
+ } \
+ *Vd = temp; \
}
FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
@@ -2943,14 +2990,19 @@ DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
void HELPER(vffintl_d_w)(void *vd, void *vj,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 64;
vec_clear_cause(env);
- for (i = 0; i < 2; i++) {
- temp.D(i) = int32_to_float64(Vj->W(i), &env->fp_status);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2959,14 +3011,19 @@ void HELPER(vffintl_d_w)(void *vd, void *vj,
void HELPER(vffinth_d_w)(void *vd, void *vj,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 64;
vec_clear_cause(env);
- for (i = 0; i < 2; i++) {
- temp.D(i) = int32_to_float64(Vj->W(i + 2), &env->fp_status);
+ for (i = 0; i < oprsz /16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
@@ -2975,16 +3032,22 @@ void HELPER(vffinth_d_w)(void *vd, void *vj,
void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
CPULoongArchState *env, uint32_t desc)
{
- int i;
- VReg temp;
+ int i, j, ofs;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+ ofs = LSX_LEN / 64;
vec_clear_cause(env);
- for (i = 0; i < 2; i++) {
- temp.W(i + 2) = int64_to_float32(Vj->D(i), &env->fp_status);
- temp.W(i) = int64_to_float32(Vk->D(i), &env->fp_status);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i),
+ &env->fp_status);
+ temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i),
+ &env->fp_status);
+ }
vec_update_fcsr0(env, GETPC());
}
*Vd = temp;
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 87466ef669..3dc4a8b654 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4480,6 +4480,12 @@ TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
+TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
+TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
+TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
+TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
+TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
+TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
@@ -4491,6 +4497,16 @@ TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
+TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
+TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
+TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
+TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
+TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
+TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
+TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
+TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
+TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
+TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
@@ -4521,6 +4537,35 @@ TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
+TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
+TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
+TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
+TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
+TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
+TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
+TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
+TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
+TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
+TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
+TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
+TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
+TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
+TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
+TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
+TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
+TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
+TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
+TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
+TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
+TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
+TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
+TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
+TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
+TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
+TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
+TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
+TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
+TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
@@ -4529,6 +4574,13 @@ TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
+TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
+TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
+TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
+TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
+TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
+TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
+TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 48/57] target/loongarch: Implement xvseq xvsle xvslt
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (46 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 47/57] target/loongarch: Implement LASX fpu fcvt instructions Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 49/57] target/loongarch: Implement xvfcmp Song Gao
` (8 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSEQ[I].{B/H/W/D};
- XVSLE[I].{B/H/W/D}[U];
- XVSLT[I].{B/H/W/D/}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 43 ++++
target/loongarch/disas.c | 43 ++++
target/loongarch/vec_helper.c | 23 +-
target/loongarch/insn_trans/trans_vec.c.inc | 271 ++++++++++++++------
4 files changed, 285 insertions(+), 95 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index ed4f82e7fe..82c26a318b 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1915,6 +1915,49 @@ xvffintl_d_w 0111 01101001 11100 00100 ..... ..... @vv
xvffinth_d_w 0111 01101001 11100 00101 ..... ..... @vv
xvffint_s_l 0111 01010100 10000 ..... ..... ..... @vvv
+xvseq_b 0111 01000000 00000 ..... ..... ..... @vvv
+xvseq_h 0111 01000000 00001 ..... ..... ..... @vvv
+xvseq_w 0111 01000000 00010 ..... ..... ..... @vvv
+xvseq_d 0111 01000000 00011 ..... ..... ..... @vvv
+xvseqi_b 0111 01101000 00000 ..... ..... ..... @vv_i5
+xvseqi_h 0111 01101000 00001 ..... ..... ..... @vv_i5
+xvseqi_w 0111 01101000 00010 ..... ..... ..... @vv_i5
+xvseqi_d 0111 01101000 00011 ..... ..... ..... @vv_i5
+
+xvsle_b 0111 01000000 00100 ..... ..... ..... @vvv
+xvsle_h 0111 01000000 00101 ..... ..... ..... @vvv
+xvsle_w 0111 01000000 00110 ..... ..... ..... @vvv
+xvsle_d 0111 01000000 00111 ..... ..... ..... @vvv
+xvslei_b 0111 01101000 00100 ..... ..... ..... @vv_i5
+xvslei_h 0111 01101000 00101 ..... ..... ..... @vv_i5
+xvslei_w 0111 01101000 00110 ..... ..... ..... @vv_i5
+xvslei_d 0111 01101000 00111 ..... ..... ..... @vv_i5
+xvsle_bu 0111 01000000 01000 ..... ..... ..... @vvv
+xvsle_hu 0111 01000000 01001 ..... ..... ..... @vvv
+xvsle_wu 0111 01000000 01010 ..... ..... ..... @vvv
+xvsle_du 0111 01000000 01011 ..... ..... ..... @vvv
+xvslei_bu 0111 01101000 01000 ..... ..... ..... @vv_ui5
+xvslei_hu 0111 01101000 01001 ..... ..... ..... @vv_ui5
+xvslei_wu 0111 01101000 01010 ..... ..... ..... @vv_ui5
+xvslei_du 0111 01101000 01011 ..... ..... ..... @vv_ui5
+
+xvslt_b 0111 01000000 01100 ..... ..... ..... @vvv
+xvslt_h 0111 01000000 01101 ..... ..... ..... @vvv
+xvslt_w 0111 01000000 01110 ..... ..... ..... @vvv
+xvslt_d 0111 01000000 01111 ..... ..... ..... @vvv
+xvslti_b 0111 01101000 01100 ..... ..... ..... @vv_i5
+xvslti_h 0111 01101000 01101 ..... ..... ..... @vv_i5
+xvslti_w 0111 01101000 01110 ..... ..... ..... @vv_i5
+xvslti_d 0111 01101000 01111 ..... ..... ..... @vv_i5
+xvslt_bu 0111 01000000 10000 ..... ..... ..... @vvv
+xvslt_hu 0111 01000000 10001 ..... ..... ..... @vvv
+xvslt_wu 0111 01000000 10010 ..... ..... ..... @vvv
+xvslt_du 0111 01000000 10011 ..... ..... ..... @vvv
+xvslti_bu 0111 01101000 10000 ..... ..... ..... @vv_ui5
+xvslti_hu 0111 01101000 10001 ..... ..... ..... @vv_ui5
+xvslti_wu 0111 01101000 10010 ..... ..... ..... @vv_ui5
+xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index f1a1321d0d..48e0b559f2 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2342,6 +2342,49 @@ INSN_LASX(xvffintl_d_w, vv)
INSN_LASX(xvffinth_d_w, vv)
INSN_LASX(xvffint_s_l, vvv)
+INSN_LASX(xvseq_b, vvv)
+INSN_LASX(xvseq_h, vvv)
+INSN_LASX(xvseq_w, vvv)
+INSN_LASX(xvseq_d, vvv)
+INSN_LASX(xvseqi_b, vv_i)
+INSN_LASX(xvseqi_h, vv_i)
+INSN_LASX(xvseqi_w, vv_i)
+INSN_LASX(xvseqi_d, vv_i)
+
+INSN_LASX(xvsle_b, vvv)
+INSN_LASX(xvsle_h, vvv)
+INSN_LASX(xvsle_w, vvv)
+INSN_LASX(xvsle_d, vvv)
+INSN_LASX(xvslei_b, vv_i)
+INSN_LASX(xvslei_h, vv_i)
+INSN_LASX(xvslei_w, vv_i)
+INSN_LASX(xvslei_d, vv_i)
+INSN_LASX(xvsle_bu, vvv)
+INSN_LASX(xvsle_hu, vvv)
+INSN_LASX(xvsle_wu, vvv)
+INSN_LASX(xvsle_du, vvv)
+INSN_LASX(xvslei_bu, vv_i)
+INSN_LASX(xvslei_hu, vv_i)
+INSN_LASX(xvslei_wu, vv_i)
+INSN_LASX(xvslei_du, vv_i)
+
+INSN_LASX(xvslt_b, vvv)
+INSN_LASX(xvslt_h, vvv)
+INSN_LASX(xvslt_w, vvv)
+INSN_LASX(xvslt_d, vvv)
+INSN_LASX(xvslti_b, vv_i)
+INSN_LASX(xvslti_h, vv_i)
+INSN_LASX(xvslti_w, vv_i)
+INSN_LASX(xvslti_d, vv_i)
+INSN_LASX(xvslt_bu, vvv)
+INSN_LASX(xvslt_hu, vvv)
+INSN_LASX(xvslt_wu, vvv)
+INSN_LASX(xvslt_du, vvv)
+INSN_LASX(xvslti_bu, vv_i)
+INSN_LASX(xvslti_hu, vv_i)
+INSN_LASX(xvslti_wu, vv_i)
+INSN_LASX(xvslti_du, vv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 9dcec7ad40..2030fbf29b 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -3057,17 +3057,18 @@ void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
#define VSLE(a, b) (a <= b ? -1 : 0)
#define VSLT(a, b) (a < b ? -1 : 0)
-#define VCMPI(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t v) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
- } \
+#define VCMPI(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
+ } \
}
VCMPI(vseqi_b, 8, B, VSEQ)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 3dc4a8b654..9b1ddd7620 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4582,22 +4582,39 @@ TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
-static bool do_cmp(DisasContext *ctx, arg_vvv *a, MemOp mop, TCGCond cond)
+static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
+ uint32_t oprsz, MemOp mop, TCGCond cond)
{
uint32_t vd_ofs, vj_ofs, vk_ofs;
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
vd_ofs = vec_full_offset(a->vd);
vj_ofs = vec_full_offset(a->vj);
vk_ofs = vec_full_offset(a->vk);
- tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
+ tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
return true;
}
+static bool do_cmp(DisasContext *ctx, arg_vvv *a,
+ MemOp mop, TCGCond cond)
+{
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ return do_cmp_vl(ctx, a, 16, mop, cond);
+}
+
+static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
+ MemOp mop, TCGCond cond)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return do_cmp_vl(ctx, a, 32, mop, cond);
+}
+
static void do_cmpi_vec(TCGCond cond,
unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
{
@@ -4629,107 +4646,153 @@ static void gen_vslti_u_vec(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
do_cmpi_vec(TCG_COND_LTU, vece, t, a, imm);
}
+#define DO_CMPI_S_VL(NAME) \
+static bool do_## NAME ##_s_vl(DisasContext *ctx, arg_vv_i *a, \
+ uint32_t oprsz, MemOp mop) \
+{ \
+ uint32_t vd_ofs, vj_ofs; \
+ \
+ static const TCGOpcode vecop_list[] = { \
+ INDEX_op_cmp_vec, 0 \
+ }; \
+ static const GVecGen2i op[4] = { \
+ { \
+ .fniv = gen_## NAME ##_s_vec, \
+ .fnoi = gen_helper_## NAME ##_b, \
+ .opt_opc = vecop_list, \
+ .vece = MO_8 \
+ }, \
+ { \
+ .fniv = gen_## NAME ##_s_vec, \
+ .fnoi = gen_helper_## NAME ##_h, \
+ .opt_opc = vecop_list, \
+ .vece = MO_16 \
+ }, \
+ { \
+ .fniv = gen_## NAME ##_s_vec, \
+ .fnoi = gen_helper_## NAME ##_w, \
+ .opt_opc = vecop_list, \
+ .vece = MO_32 \
+ }, \
+ { \
+ .fniv = gen_## NAME ##_s_vec, \
+ .fnoi = gen_helper_## NAME ##_d, \
+ .opt_opc = vecop_list, \
+ .vece = MO_64 \
+ } \
+ }; \
+ \
+ vd_ofs = vec_full_offset(a->vd); \
+ vj_ofs = vec_full_offset(a->vj); \
+ \
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, ctx->vl / 8, a->imm, &op[mop]); \
+ \
+ return true; \
+}
+
+DO_CMPI_S_VL(vseqi)
+DO_CMPI_S_VL(vslei)
+DO_CMPI_S_VL(vslti)
+
#define DO_CMPI_S(NAME) \
static bool do_## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
{ \
- uint32_t vd_ofs, vj_ofs; \
- \
if (!check_vec(ctx, 16)) { \
return true; \
} \
- \
- static const TCGOpcode vecop_list[] = { \
- INDEX_op_cmp_vec, 0 \
- }; \
- static const GVecGen2i op[4] = { \
- { \
- .fniv = gen_## NAME ##_s_vec, \
- .fnoi = gen_helper_## NAME ##_b, \
- .opt_opc = vecop_list, \
- .vece = MO_8 \
- }, \
- { \
- .fniv = gen_## NAME ##_s_vec, \
- .fnoi = gen_helper_## NAME ##_h, \
- .opt_opc = vecop_list, \
- .vece = MO_16 \
- }, \
- { \
- .fniv = gen_## NAME ##_s_vec, \
- .fnoi = gen_helper_## NAME ##_w, \
- .opt_opc = vecop_list, \
- .vece = MO_32 \
- }, \
- { \
- .fniv = gen_## NAME ##_s_vec, \
- .fnoi = gen_helper_## NAME ##_d, \
- .opt_opc = vecop_list, \
- .vece = MO_64 \
- } \
- }; \
- \
- vd_ofs = vec_full_offset(a->vd); \
- vj_ofs = vec_full_offset(a->vj); \
- \
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
- \
- return true; \
+ return do_## NAME ##_s_vl(ctx, a, 16, mop); \
}
DO_CMPI_S(vseqi)
DO_CMPI_S(vslei)
DO_CMPI_S(vslti)
+#define DO_XCMPI_S(NAME) \
+static bool do_x## NAME ##_s(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
+{ \
+ if (!check_vec(ctx, 32)) { \
+ return true; \
+ } \
+ \
+ return do_## NAME ##_s_vl(ctx, a, 32, mop); \
+}
+
+DO_XCMPI_S(vseqi)
+DO_XCMPI_S(vslei)
+DO_XCMPI_S(vslti)
+
+#define DO_CMPI_U_VL(NAME) \
+static bool do_## NAME ##_u_vl(DisasContext *ctx, arg_vv_i *a, \
+ uint32_t oprsz, MemOp mop) \
+{ \
+ uint32_t vd_ofs, vj_ofs; \
+ \
+ static const TCGOpcode vecop_list[] = { \
+ INDEX_op_cmp_vec, 0 \
+ }; \
+ static const GVecGen2i op[4] = { \
+ { \
+ .fniv = gen_## NAME ##_u_vec, \
+ .fnoi = gen_helper_## NAME ##_bu, \
+ .opt_opc = vecop_list, \
+ .vece = MO_8 \
+ }, \
+ { \
+ .fniv = gen_## NAME ##_u_vec, \
+ .fnoi = gen_helper_## NAME ##_hu, \
+ .opt_opc = vecop_list, \
+ .vece = MO_16 \
+ }, \
+ { \
+ .fniv = gen_## NAME ##_u_vec, \
+ .fnoi = gen_helper_## NAME ##_wu, \
+ .opt_opc = vecop_list, \
+ .vece = MO_32 \
+ }, \
+ { \
+ .fniv = gen_## NAME ##_u_vec, \
+ .fnoi = gen_helper_## NAME ##_du, \
+ .opt_opc = vecop_list, \
+ .vece = MO_64 \
+ } \
+ }; \
+ \
+ vd_ofs = vec_full_offset(a->vd); \
+ vj_ofs = vec_full_offset(a->vj); \
+ \
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, ctx->vl / 8, a->imm, &op[mop]); \
+ \
+ return true; \
+}
+
+DO_CMPI_U_VL(vslei)
+DO_CMPI_U_VL(vslti)
+
#define DO_CMPI_U(NAME) \
static bool do_## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
{ \
- uint32_t vd_ofs, vj_ofs; \
- \
if (!check_vec(ctx, 16)) { \
return true; \
} \
- \
- static const TCGOpcode vecop_list[] = { \
- INDEX_op_cmp_vec, 0 \
- }; \
- static const GVecGen2i op[4] = { \
- { \
- .fniv = gen_## NAME ##_u_vec, \
- .fnoi = gen_helper_## NAME ##_bu, \
- .opt_opc = vecop_list, \
- .vece = MO_8 \
- }, \
- { \
- .fniv = gen_## NAME ##_u_vec, \
- .fnoi = gen_helper_## NAME ##_hu, \
- .opt_opc = vecop_list, \
- .vece = MO_16 \
- }, \
- { \
- .fniv = gen_## NAME ##_u_vec, \
- .fnoi = gen_helper_## NAME ##_wu, \
- .opt_opc = vecop_list, \
- .vece = MO_32 \
- }, \
- { \
- .fniv = gen_## NAME ##_u_vec, \
- .fnoi = gen_helper_## NAME ##_du, \
- .opt_opc = vecop_list, \
- .vece = MO_64 \
- } \
- }; \
- \
- vd_ofs = vec_full_offset(a->vd); \
- vj_ofs = vec_full_offset(a->vj); \
- \
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, 16, ctx->vl/8, a->imm, &op[mop]); \
- \
- return true; \
+ return do_## NAME ##_u_vl(ctx, a, 16, mop); \
}
DO_CMPI_U(vslei)
DO_CMPI_U(vslti)
+#define DO_XCMPI_U(NAME) \
+static bool do_x## NAME ##_u(DisasContext *ctx, arg_vv_i *a, MemOp mop) \
+{ \
+ if (!check_vec(ctx, 32)) { \
+ return true; \
+ } \
+ \
+ return do_## NAME ##_u_vl(ctx, a, 32, mop); \
+}
+
+DO_XCMPI_U(vslei)
+DO_XCMPI_U(vslti)
+
TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
@@ -4738,6 +4801,14 @@ TRANS(vseqi_b, LSX, do_vseqi_s, MO_8)
TRANS(vseqi_h, LSX, do_vseqi_s, MO_16)
TRANS(vseqi_w, LSX, do_vseqi_s, MO_32)
TRANS(vseqi_d, LSX, do_vseqi_s, MO_64)
+TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
+TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
+TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
+TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
+TRANS(xvseqi_b, LASX, do_xvseqi_s, MO_8)
+TRANS(xvseqi_h, LASX, do_xvseqi_s, MO_16)
+TRANS(xvseqi_w, LASX, do_xvseqi_s, MO_32)
+TRANS(xvseqi_d, LASX, do_xvseqi_s, MO_64)
TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
@@ -4755,6 +4826,22 @@ TRANS(vslei_bu, LSX, do_vslei_u, MO_8)
TRANS(vslei_hu, LSX, do_vslei_u, MO_16)
TRANS(vslei_wu, LSX, do_vslei_u, MO_32)
TRANS(vslei_du, LSX, do_vslei_u, MO_64)
+TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
+TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
+TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
+TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
+TRANS(xvslei_b, LASX, do_xvslei_s, MO_8)
+TRANS(xvslei_h, LASX, do_xvslei_s, MO_16)
+TRANS(xvslei_w, LASX, do_xvslei_s, MO_32)
+TRANS(xvslei_d, LASX, do_xvslei_s, MO_64)
+TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
+TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
+TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
+TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
+TRANS(xvslei_bu, LASX, do_xvslei_u, MO_8)
+TRANS(xvslei_hu, LASX, do_xvslei_u, MO_16)
+TRANS(xvslei_wu, LASX, do_xvslei_u, MO_32)
+TRANS(xvslei_du, LASX, do_xvslei_u, MO_64)
TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
@@ -4772,6 +4859,22 @@ TRANS(vslti_bu, LSX, do_vslti_u, MO_8)
TRANS(vslti_hu, LSX, do_vslti_u, MO_16)
TRANS(vslti_wu, LSX, do_vslti_u, MO_32)
TRANS(vslti_du, LSX, do_vslti_u, MO_64)
+TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
+TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
+TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
+TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
+TRANS(xvslti_b, LASX, do_xvslti_s, MO_8)
+TRANS(xvslti_h, LASX, do_xvslti_s, MO_16)
+TRANS(xvslti_w, LASX, do_xvslti_s, MO_32)
+TRANS(xvslti_d, LASX, do_xvslti_s, MO_64)
+TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
+TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
+TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
+TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
+TRANS(xvslti_bu, LASX, do_xvslti_u, MO_8)
+TRANS(xvslti_hu, LASX, do_xvslti_u, MO_16)
+TRANS(xvslti_wu, LASX, do_xvslti_u, MO_32)
+TRANS(xvslti_du, LASX, do_xvslti_u, MO_64)
static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 49/57] target/loongarch: Implement xvfcmp
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (47 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 48/57] target/loongarch: Implement xvseq xvsle xvslt Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 50/57] target/loongarch: Implement xvbitsel xvset Song Gao
` (7 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVFCMP.cond.{S/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/helper.h | 8 +-
target/loongarch/insns.decode | 3 +
target/loongarch/disas.c | 93 +++++++++++++++++++++
target/loongarch/vec_helper.c | 4 +-
target/loongarch/insn_trans/trans_vec.c.inc | 31 ++++---
5 files changed, 117 insertions(+), 22 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index e9c5412267..b54ce68077 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -652,10 +652,10 @@ DEF_HELPER_FLAGS_4(vslti_hu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vslti_wu, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vslti_du, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_5(vfcmp_c_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfcmp_s_s, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfcmp_c_d, void, env, i32, i32, i32, i32)
-DEF_HELPER_5(vfcmp_s_d, void, env, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_c_s, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_s_s, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_c_d, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32)
DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 82c26a318b..0d46bd5e5e 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1958,6 +1958,9 @@ xvslti_hu 0111 01101000 10001 ..... ..... ..... @vv_ui5
xvslti_wu 0111 01101000 10010 ..... ..... ..... @vv_ui5
xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5
+xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @vvv_fcond
+xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @vvv_fcond
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 48e0b559f2..4ab51b712e 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2385,6 +2385,99 @@ INSN_LASX(xvslti_hu, vv_i)
INSN_LASX(xvslti_wu, vv_i)
INSN_LASX(xvslti_du, vv_i)
+#define output_xvfcmp(C, PREFIX, SUFFIX) \
+{ \
+ (C)->info->fprintf_func((C)->info->stream, "%08x %s%s\tx%d, x%d, x%d", \
+ (C)->insn, PREFIX, SUFFIX, a->vd, \
+ a->vj, a->vk); \
+}
+static bool output_xxx_fcond(DisasContext *ctx, arg_vvv_fcond * a,
+ const char *suffix)
+{
+ bool ret = true;
+ switch (a->fcond) {
+ case 0x0:
+ output_xvfcmp(ctx, "xvfcmp_caf_", suffix);
+ break;
+ case 0x1:
+ output_xvfcmp(ctx, "xvfcmp_saf_", suffix);
+ break;
+ case 0x2:
+ output_xvfcmp(ctx, "xvfcmp_clt_", suffix);
+ break;
+ case 0x3:
+ output_xvfcmp(ctx, "xvfcmp_slt_", suffix);
+ break;
+ case 0x4:
+ output_xvfcmp(ctx, "xvfcmp_ceq_", suffix);
+ break;
+ case 0x5:
+ output_xvfcmp(ctx, "xvfcmp_seq_", suffix);
+ break;
+ case 0x6:
+ output_xvfcmp(ctx, "xvfcmp_cle_", suffix);
+ break;
+ case 0x7:
+ output_xvfcmp(ctx, "xvfcmp_sle_", suffix);
+ break;
+ case 0x8:
+ output_xvfcmp(ctx, "xvfcmp_cun_", suffix);
+ break;
+ case 0x9:
+ output_xvfcmp(ctx, "xvfcmp_sun_", suffix);
+ break;
+ case 0xA:
+ output_xvfcmp(ctx, "xvfcmp_cult_", suffix);
+ break;
+ case 0xB:
+ output_xvfcmp(ctx, "xvfcmp_sult_", suffix);
+ break;
+ case 0xC:
+ output_xvfcmp(ctx, "xvfcmp_cueq_", suffix);
+ break;
+ case 0xD:
+ output_xvfcmp(ctx, "xvfcmp_sueq_", suffix);
+ break;
+ case 0xE:
+ output_xvfcmp(ctx, "xvfcmp_cule_", suffix);
+ break;
+ case 0xF:
+ output_xvfcmp(ctx, "xvfcmp_sule_", suffix);
+ break;
+ case 0x10:
+ output_xvfcmp(ctx, "xvfcmp_cne_", suffix);
+ break;
+ case 0x11:
+ output_xvfcmp(ctx, "xvfcmp_sne_", suffix);
+ break;
+ case 0x14:
+ output_xvfcmp(ctx, "xvfcmp_cor_", suffix);
+ break;
+ case 0x15:
+ output_xvfcmp(ctx, "xvfcmp_sor_", suffix);
+ break;
+ case 0x18:
+ output_xvfcmp(ctx, "xvfcmp_cune_", suffix);
+ break;
+ case 0x19:
+ output_xvfcmp(ctx, "xvfcmp_sune_", suffix);
+ break;
+ default:
+ ret = false;
+ }
+ return ret;
+}
+
+#define LASX_FCMP_INSN(suffix) \
+static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \
+ arg_vvv_fcond * a) \
+{ \
+ return output_xxx_fcond(ctx, a, #suffix); \
+}
+
+LASX_FCMP_INSN(s)
+LASX_FCMP_INSN(d)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 2030fbf29b..675d0167f8 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -3122,7 +3122,7 @@ static uint64_t vfcmp_common(CPULoongArchState *env,
}
#define VFCMP(NAME, BIT, E, FN) \
-void HELPER(NAME)(CPULoongArchState *env, \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
{ \
int i; \
@@ -3132,7 +3132,7 @@ void HELPER(NAME)(CPULoongArchState *env, \
VReg *Vk = &(env->fpr[vk].vreg); \
\
vec_clear_cause(env); \
- for (i = 0; i < LSX_LEN/BIT ; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
FloatRelation cmp; \
cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
t.E(i) = vfcmp_common(env, cmp, flags); \
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 9b1ddd7620..dbcd6a4127 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -4876,52 +4876,51 @@ TRANS(xvslti_hu, LASX, do_xvslti_u, MO_16)
TRANS(xvslti_wu, LASX, do_xvslti_u, MO_32)
TRANS(xvslti_du, LASX, do_xvslti_u, MO_64)
-static bool trans_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a)
+static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
{
uint32_t flags;
- void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
TCGv_i32 vd = tcg_constant_i32(a->vd);
TCGv_i32 vj = tcg_constant_i32(a->vj);
TCGv_i32 vk = tcg_constant_i32(a->vk);
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, sz)) {
return true;
}
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
flags = get_fcmp_flags(a->fcond >> 1);
- fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
+ fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
return true;
}
-static bool trans_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a)
+static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
{
uint32_t flags;
- void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
TCGv_i32 vd = tcg_constant_i32(a->vd);
TCGv_i32 vj = tcg_constant_i32(a->vj);
TCGv_i32 vk = tcg_constant_i32(a->vk);
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, sz)) {
return true;
}
fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
flags = get_fcmp_flags(a->fcond >> 1);
- fn(cpu_env, vd, vj, vk, tcg_constant_i32(flags));
+ fn(cpu_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
return true;
}
+TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
+TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
+TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
+TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
+
static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
{
if (!avail_LSX(ctx)) {
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 50/57] target/loongarch: Implement xvbitsel xvset
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (48 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 49/57] target/loongarch: Implement xvfcmp Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr Song Gao
` (6 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVBITSEL.V;
- XVBITSELI.B;
- XVSET{EQZ/NEZ}.V;
- XVSETANYEQZ.{B/H/W/D};
- XVSETALLNEZ.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/helper.h | 16 ++--
target/loongarch/insns.decode | 15 ++++
target/loongarch/disas.c | 19 ++++
target/loongarch/vec_helper.c | 42 +++++----
target/loongarch/insn_trans/trans_vec.c.inc | 97 +++++++++++++++++----
5 files changed, 148 insertions(+), 41 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index b54ce68077..85233586e3 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -659,14 +659,14 @@ DEF_HELPER_6(vfcmp_s_d, void, env, i32, i32, i32, i32, i32)
DEF_HELPER_FLAGS_4(vbitseli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_3(vsetanyeqz_b, void, env, i32, i32)
-DEF_HELPER_3(vsetanyeqz_h, void, env, i32, i32)
-DEF_HELPER_3(vsetanyeqz_w, void, env, i32, i32)
-DEF_HELPER_3(vsetanyeqz_d, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_b, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_h, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_w, void, env, i32, i32)
-DEF_HELPER_3(vsetallnez_d, void, env, i32, i32)
+DEF_HELPER_4(vsetanyeqz_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetanyeqz_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetanyeqz_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetanyeqz_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_b, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32)
DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 0d46bd5e5e..ad6751fdfb 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1961,6 +1961,21 @@ xvslti_du 0111 01101000 10011 ..... ..... ..... @vv_ui5
xvfcmp_cond_s 0000 11001001 ..... ..... ..... ..... @vvv_fcond
xvfcmp_cond_d 0000 11001010 ..... ..... ..... ..... @vvv_fcond
+xvbitsel_v 0000 11010010 ..... ..... ..... ..... @vvvv
+
+xvbitseli_b 0111 01111100 01 ........ ..... ..... @vv_ui8
+
+xvseteqz_v 0111 01101001 11001 00110 ..... 00 ... @cv
+xvsetnez_v 0111 01101001 11001 00111 ..... 00 ... @cv
+xvsetanyeqz_b 0111 01101001 11001 01000 ..... 00 ... @cv
+xvsetanyeqz_h 0111 01101001 11001 01001 ..... 00 ... @cv
+xvsetanyeqz_w 0111 01101001 11001 01010 ..... 00 ... @cv
+xvsetanyeqz_d 0111 01101001 11001 01011 ..... 00 ... @cv
+xvsetallnez_b 0111 01101001 11001 01100 ..... 00 ... @cv
+xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cv
+xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cv
+xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cv
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 4ab51b712e..abe113b150 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1703,6 +1703,11 @@ static bool trans_##insn(DisasContext *ctx, arg_##type * a) \
return true; \
}
+static void output_cv_x(DisasContext *ctx, arg_cv *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "fcc%d, x%d", a->cd, a->vj);
+}
+
static void output_v_i_x(DisasContext *ctx, arg_v_i *a, const char *mnemonic)
{
output(ctx, mnemonic, "x%d, 0x%x", a->vd, a->imm);
@@ -2478,6 +2483,20 @@ static bool trans_xvfcmp_cond_##suffix(DisasContext *ctx, \
LASX_FCMP_INSN(s)
LASX_FCMP_INSN(d)
+INSN_LASX(xvbitsel_v, vvvv)
+INSN_LASX(xvbitseli_b, vv_i)
+
+INSN_LASX(xvseteqz_v, cv)
+INSN_LASX(xvsetnez_v, cv)
+INSN_LASX(xvsetanyeqz_b, cv)
+INSN_LASX(xvsetanyeqz_h, cv)
+INSN_LASX(xvsetanyeqz_w, cv)
+INSN_LASX(xvsetanyeqz_d, cv)
+INSN_LASX(xvsetallnez_b, cv)
+INSN_LASX(xvsetallnez_h, cv)
+INSN_LASX(xvsetallnez_w, cv)
+INSN_LASX(xvsetallnez_d, cv)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 675d0167f8..2f9acd4364 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -3146,13 +3146,13 @@ VFCMP(vfcmp_s_s, 32, UW, float32_compare)
VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
VFCMP(vfcmp_s_d, 64, UD, float64_compare)
-void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
+void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
int i;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < simd_oprsz(desc); i++) {
Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
}
}
@@ -3160,7 +3160,7 @@ void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t v)
/* Copy from target/arm/tcg/sve_helper.c */
static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
{
- uint64_t bits = 8 << esz;
+ int bits = 8 << esz;
uint64_t ones = dup_const(esz, 1);
uint64_t signs = ones << (bits - 1);
uint64_t cmp0, cmp1;
@@ -3173,25 +3173,37 @@ static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
return (cmp0 | cmp1) & signs;
}
-#define SETANYEQZ(NAME, MO) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
-{ \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
+#define SETANYEQZ(NAME, MO) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t oprsz, uint32_t cd, uint32_t vj) \
+{ \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
+ if (oprsz == 32) { \
+ env->cf[cd & 0x7] = env->cf[cd & 0x7] || \
+ do_match2(0, Vj->D(2), Vj->D(3), MO); \
+ } \
}
+
SETANYEQZ(vsetanyeqz_b, MO_8)
SETANYEQZ(vsetanyeqz_h, MO_16)
SETANYEQZ(vsetanyeqz_w, MO_32)
SETANYEQZ(vsetanyeqz_d, MO_64)
-#define SETALLNEZ(NAME, MO) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t cd, uint32_t vj) \
-{ \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
+#define SETALLNEZ(NAME, MO) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t oprsz, uint32_t cd, uint32_t vj) \
+{ \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
+ if (oprsz == 32) { \
+ env->cf[cd & 0x7] = env->cf[cd & 0x7] && \
+ !do_match2(0, Vj->D(2), Vj->D(3), MO); \
+ } \
}
+
SETALLNEZ(vsetallnez_b, MO_8)
SETALLNEZ(vsetallnez_h, MO_16)
SETALLNEZ(vsetallnez_w, MO_32)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index dbcd6a4127..b68daa53ae 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -227,18 +227,35 @@ static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
return gen_vv_i_vl(ctx, a, 32, fn);
}
-static bool gen_cv(DisasContext *ctx, arg_cv *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
+static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
{
TCGv_i32 vj = tcg_constant_i32(a->vj);
TCGv_i32 cd = tcg_constant_i32(a->cd);
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
+ func(cpu_env, oprsz, cd, vj);
+ return true;
+}
+
+static bool gen_cv(DisasContext *ctx, arg_cv *a,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
if (!check_vec(ctx, 16)) {
return true;
}
- func(cpu_env, cd, vj);
- return true;
+ return gen_cv_vl(ctx, a, 16, func);
+}
+
+static bool gen_cx(DisasContext *ctx, arg_cv *a,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_cv_vl(ctx, a, 32, func);
}
static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
@@ -4921,28 +4938,27 @@ TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
-static bool trans_vbitsel_v(DisasContext *ctx, arg_vvvv *a)
+static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
{
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, oprsz)) {
return true;
}
tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
vec_full_offset(a->vk), vec_full_offset(a->vj),
- 16, ctx->vl/8);
+ oprsz, ctx->vl / 8);
return true;
}
+TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
+TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
+
static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
{
tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
}
-static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
+static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
{
static const GVecGen2i op = {
.fniv = gen_vbitseli,
@@ -4951,19 +4967,18 @@ static bool trans_vbitseli_b(DisasContext *ctx, arg_vv_i *a)
.load_dest = true
};
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, oprsz)) {
return true;
}
tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
- 16, ctx->vl/8, a->imm, &op);
+ oprsz, ctx->vl / 8, a->imm , &op);
return true;
}
+TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
+TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
+
#define VSET(NAME, COND) \
static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
{ \
@@ -5003,6 +5018,52 @@ TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
+#define XVSET(NAME, COND) \
+static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \
+{ \
+ TCGv_i64 t1, t2, d[4]; \
+ \
+ d[0] = tcg_temp_new_i64(); \
+ d[1] = tcg_temp_new_i64(); \
+ d[2] = tcg_temp_new_i64(); \
+ d[3] = tcg_temp_new_i64(); \
+ t1 = tcg_temp_new_i64(); \
+ t2 = tcg_temp_new_i64(); \
+ \
+ get_vreg64(d[0], a->vj, 0); \
+ get_vreg64(d[1], a->vj, 1); \
+ get_vreg64(d[2], a->vj, 2); \
+ get_vreg64(d[3], a->vj, 3); \
+ \
+ if (!avail_LASX(ctx)) { \
+ return false; \
+ } \
+ \
+ if (!check_vec(ctx, 32)) { \
+ return true; \
+ } \
+ \
+ tcg_gen_or_i64(t1, d[0], d[1]); \
+ tcg_gen_or_i64(t2, d[2], d[3]); \
+ tcg_gen_or_i64(t1, t2, t1); \
+ tcg_gen_setcondi_i64(COND, t1, t1, 0); \
+ tcg_gen_st8_tl(t1, cpu_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
+ \
+ return true; \
+}
+
+XVSET(xvseteqz_v, TCG_COND_EQ)
+XVSET(xvsetnez_v, TCG_COND_NE)
+
+TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
+TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
+TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
+TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
+TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
+TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
+TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
+TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
+
static bool trans_vinsgr2vr_b(DisasContext *ctx, arg_vr_i *a)
{
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (49 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 50/57] target/loongarch: Implement xvbitsel xvset Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 22:27 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 52/57] target/loongarch: Implement xvreplve xvinsve0 xvpickve Song Gao
` (5 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVINSGR2VR.{W/D};
- XVPICKVE2GR.{W/D}[U].
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 7 +++
target/loongarch/disas.c | 17 ++++++++
target/loongarch/insn_trans/trans_vec.c.inc | 48 +++++++++++++++++++++
3 files changed, 72 insertions(+)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index ad6751fdfb..bb3bb447ae 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1976,6 +1976,13 @@ xvsetallnez_h 0111 01101001 11001 01101 ..... 00 ... @cv
xvsetallnez_w 0111 01101001 11001 01110 ..... 00 ... @cv
xvsetallnez_d 0111 01101001 11001 01111 ..... 00 ... @cv
+xvinsgr2vr_w 0111 01101110 10111 10 ... ..... ..... @vr_ui3
+xvinsgr2vr_d 0111 01101110 10111 110 .. ..... ..... @vr_ui2
+xvpickve2gr_w 0111 01101110 11111 10 ... ..... ..... @rv_ui3
+xvpickve2gr_d 0111 01101110 11111 110 .. ..... ..... @rv_ui2
+xvpickve2gr_wu 0111 01101111 00111 10 ... ..... ..... @rv_ui3
+xvpickve2gr_du 0111 01101111 00111 110 .. ..... ..... @rv_ui2
+
xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index abe113b150..04f9f9fa4b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1738,6 +1738,16 @@ static void output_vv_x(DisasContext *ctx, arg_vv *a, const char *mnemonic)
output(ctx, mnemonic, "x%d, x%d", a->vd, a->vj);
}
+static void output_vr_i_x(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, r%d, 0x%x", a->vd, a->rj, a->imm);
+}
+
+static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -2497,6 +2507,13 @@ INSN_LASX(xvsetallnez_h, cv)
INSN_LASX(xvsetallnez_w, cv)
INSN_LASX(xvsetallnez_d, cv)
+INSN_LASX(xvinsgr2vr_w, vr_i)
+INSN_LASX(xvinsgr2vr_d, vr_i)
+INSN_LASX(xvpickve2gr_w, rv_i)
+INSN_LASX(xvpickve2gr_d, rv_i)
+INSN_LASX(xvpickve2gr_wu, rv_i)
+INSN_LASX(xvpickve2gr_du, rv_i)
+
INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index b68daa53ae..bf44a4d1fc 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -5268,6 +5268,54 @@ static bool trans_vpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
return true;
}
+static bool trans_xvinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
+ return trans_vinsgr2vr_w(ctx, a);
+}
+
+static bool trans_xvinsgr2vr_d(DisasContext *ctx, arg_vr_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
+ return trans_vinsgr2vr_d(ctx, a);
+}
+
+static bool trans_xvpickve2gr_w(DisasContext *ctx, arg_rv_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
+ return trans_vpickve2gr_w(ctx, a);
+}
+
+static bool trans_xvpickve2gr_d(DisasContext *ctx, arg_rv_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
+ return trans_vpickve2gr_d(ctx, a);
+}
+
+static bool trans_xvpickve2gr_wu(DisasContext *ctx, arg_rv_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
+ return trans_vpickve2gr_wu(ctx, a);
+}
+
+static bool trans_xvpickve2gr_du(DisasContext *ctx, arg_rv_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
+ return trans_vpickve2gr_du(ctx, a);
+}
+
static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
uint32_t oprsz, MemOp mop)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr
2023-09-07 8:31 ` [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr Song Gao
@ 2023-09-11 22:27 ` Richard Henderson
2023-09-12 9:09 ` gaosong
0 siblings, 1 reply; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 22:27 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> +static bool trans_xvinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
> +{
> + if (!avail_LASX(ctx)) {
> + return false;
> + }
> + return trans_vinsgr2vr_w(ctx, a);
> +}
Using the other translator doesn't help.
> static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
> {
> TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
>
> if (!avail_LSX(ctx)) {
> return false;
> }
>
> CHECK_SXE;
This portion doesn't apply, and you miss the check_vec for the larger LASX.
> tcg_gen_st32_i64(src, cpu_env,
> offsetof(CPULoongArchState, fpr[a->vd].vreg.W(a->imm)));
> return true;
> }
The only thing that is left is this one line, so I'm not sure it's worth splitting out a
common helper function.
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr
2023-09-11 22:27 ` Richard Henderson
@ 2023-09-12 9:09 ` gaosong
2023-09-12 16:20 ` Richard Henderson
0 siblings, 1 reply; 87+ messages in thread
From: gaosong @ 2023-09-12 9:09 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: maobibo
在 2023/9/12 上午6:27, Richard Henderson 写道:
> On 9/7/23 01:31, Song Gao wrote:
>> +static bool trans_xvinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
>> +{
>> + if (!avail_LASX(ctx)) {
>> + return false;
>> + }
>> + return trans_vinsgr2vr_w(ctx, a);
>> +}
>
> Using the other translator doesn't help.
>
>> static bool trans_vinsgr2vr_w(DisasContext *ctx, arg_vr_i *a)
>> {
>> TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
>>
>> if (!avail_LSX(ctx)) {
>> return false;
>> }
>>
>> CHECK_SXE;
>
> This portion doesn't apply, and you miss the check_vec for the larger LASX.
>
>> tcg_gen_st32_i64(src, cpu_env,
>> offsetof(CPULoongArchState,
>> fpr[a->vd].vreg.W(a->imm)));
>> return true;
>> }
>
> The only thing that is left is this one line, so I'm not sure it's worth
> splitting out a common helper function.
>
> I think we need, like this:
static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz,
MemOp mop,
void (*func)(TCGv, TCGv_ptr, tcg_target_long))
{
TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
if (!check_vec(ctx, oprsz)) {
return true;
}
func(src, cpu_env, vec_reg_offset(a->vd, a->imm, mop));
return true;
}
static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
void (*func)(TCGv, TCGv_ptr, tcg_target_long))
{
return gen_g2v_vl(ctx, a, 16, mop, func);
}
static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
void (*func)(TCGv, TCGv_ptr, tcg_target_long))
{
return gen_g2v_vl(ctx, a, 32, mop, func);
}
TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
Thanks.
Song Gao
^ permalink raw reply [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr
2023-09-12 9:09 ` gaosong
@ 2023-09-12 16:20 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-12 16:20 UTC (permalink / raw)
To: gaosong, qemu-devel; +Cc: maobibo
On 9/12/23 02:09, gaosong wrote:
> static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
> void (*func)(TCGv, TCGv_ptr, tcg_target_long))
> {
> TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
>
> if (!check_vec(ctx, oprsz)) {
> return true;
> }
>
> func(src, cpu_env, vec_reg_offset(a->vd, a->imm, mop));
>
> return true;
> }
>
> static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
> void (*func)(TCGv, TCGv_ptr, tcg_target_long))
> {
> return gen_g2v_vl(ctx, a, 16, mop, func);
> }
>
> static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
> void (*func)(TCGv, TCGv_ptr, tcg_target_long))
> {
> return gen_g2v_vl(ctx, a, 32, mop, func);
> }
>
> TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
> TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
> TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
> TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
> TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
> TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
Looks perfect, thanks.
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 52/57] target/loongarch: Implement xvreplve xvinsve0 xvpickve
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (50 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 51/57] target/loongarch: Implement xvinsgr2vr xvpickve2gr Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 23:21 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 53/57] target/loongarch: Implement xvpack xvpick xvilv{l/h} Song Gao
` (4 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVREPLVE.{B/H/W/D};
- XVREPL128VEI.{B/H/W/D};
- XVREPLVE0.{B/H/W/D/Q};
- XVINSVE0.{W/D};
- XVPICKVE.{W/D};
- XVBSLL.V, XVBSRL.V.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 5 +
target/loongarch/insns.decode | 25 ++
target/loongarch/disas.c | 29 +++
target/loongarch/vec_helper.c | 28 +++
target/loongarch/insn_trans/trans_vec.c.inc | 255 +++++++++++++++-----
5 files changed, 287 insertions(+), 55 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 85233586e3..fb489dda2d 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -668,6 +668,11 @@ DEF_HELPER_4(vsetallnez_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsetallnez_w, void, env, i32, i32, i32)
DEF_HELPER_4(vsetallnez_d, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(xvinsve0_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(xvinsve0_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(xvpickve_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(xvpickve_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
DEF_HELPER_FLAGS_4(vpackev_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vpackev_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vpackev_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index bb3bb447ae..74383ba3bc 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1987,3 +1987,28 @@ xvreplgr2vr_b 0111 01101001 11110 00000 ..... ..... @vr
xvreplgr2vr_h 0111 01101001 11110 00001 ..... ..... @vr
xvreplgr2vr_w 0111 01101001 11110 00010 ..... ..... @vr
xvreplgr2vr_d 0111 01101001 11110 00011 ..... ..... @vr
+
+xvreplve_b 0111 01010010 00100 ..... ..... ..... @vvr
+xvreplve_h 0111 01010010 00101 ..... ..... ..... @vvr
+xvreplve_w 0111 01010010 00110 ..... ..... ..... @vvr
+xvreplve_d 0111 01010010 00111 ..... ..... ..... @vvr
+
+xvrepl128vei_b 0111 01101111 01111 0 .... ..... ..... @vv_ui4
+xvrepl128vei_h 0111 01101111 01111 10 ... ..... ..... @vv_ui3
+xvrepl128vei_w 0111 01101111 01111 110 .. ..... ..... @vv_ui2
+xvrepl128vei_d 0111 01101111 01111 1110 . ..... ..... @vv_ui1
+
+xvreplve0_b 0111 01110000 01110 00000 ..... ..... @vv
+xvreplve0_h 0111 01110000 01111 00000 ..... ..... @vv
+xvreplve0_w 0111 01110000 01111 10000 ..... ..... @vv
+xvreplve0_d 0111 01110000 01111 11000 ..... ..... @vv
+xvreplve0_q 0111 01110000 01111 11100 ..... ..... @vv
+
+xvinsve0_w 0111 01101111 11111 10 ... ..... ..... @vv_ui3
+xvinsve0_d 0111 01101111 11111 110 .. ..... ..... @vv_ui2
+
+xvpickve_w 0111 01110000 00111 10 ... ..... ..... @vv_ui3
+xvpickve_d 0111 01110000 00111 110 .. ..... ..... @vv_ui2
+
+xvbsll_v 0111 01101000 11100 ..... ..... ..... @vv_ui5
+xvbsrl_v 0111 01101000 11101 ..... ..... ..... @vv_ui5
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 04f9f9fa4b..d091402db6 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1748,6 +1748,11 @@ static void output_rv_i_x(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->vj, a->imm);
}
+static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -2518,3 +2523,27 @@ INSN_LASX(xvreplgr2vr_b, vr)
INSN_LASX(xvreplgr2vr_h, vr)
INSN_LASX(xvreplgr2vr_w, vr)
INSN_LASX(xvreplgr2vr_d, vr)
+
+INSN_LASX(xvreplve_b, vvr)
+INSN_LASX(xvreplve_h, vvr)
+INSN_LASX(xvreplve_w, vvr)
+INSN_LASX(xvreplve_d, vvr)
+INSN_LASX(xvrepl128vei_b, vv_i)
+INSN_LASX(xvrepl128vei_h, vv_i)
+INSN_LASX(xvrepl128vei_w, vv_i)
+INSN_LASX(xvrepl128vei_d, vv_i)
+
+INSN_LASX(xvreplve0_b, vv)
+INSN_LASX(xvreplve0_h, vv)
+INSN_LASX(xvreplve0_w, vv)
+INSN_LASX(xvreplve0_d, vv)
+INSN_LASX(xvreplve0_q, vv)
+
+INSN_LASX(xvinsve0_w, vv_i)
+INSN_LASX(xvinsve0_d, vv_i)
+
+INSN_LASX(xvpickve_w, vv_i)
+INSN_LASX(xvpickve_d, vv_i)
+
+INSN_LASX(xvbsll_v, vv_i)
+INSN_LASX(xvbsrl_v, vv_i)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 2f9acd4364..6832189151 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -3209,6 +3209,34 @@ SETALLNEZ(vsetallnez_h, MO_16)
SETALLNEZ(vsetallnez_w, MO_32)
SETALLNEZ(vsetallnez_d, MO_64)
+#define XVINSVE0(NAME, E, MASK) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ Vd->E(imm & MASK) = Vj->E(0); \
+}
+
+XVINSVE0(xvinsve0_w, W, 0x7)
+XVINSVE0(xvinsve0_d, D, 0x3)
+
+#define XVPICKVE(NAME, E, BIT, MASK) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ Vd->E(0) = Vj->E(imm & MASK); \
+ for (i = 1; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = 0; \
+ } \
+}
+
+XVPICKVE(xvpickve_w, W, 32, 0x7)
+XVPICKVE(xvpickve_d, D, 64, 0x3)
+
#define VPACKEV(NAME, BIT, E) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index bf44a4d1fc..70babae2c2 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -5419,112 +5419,257 @@ static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
return true;
}
-static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
- void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
+ uint32_t oprsz, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_ptr t1 = tcg_temp_new_ptr();
TCGv_i64 t2 = tcg_temp_new_i64();
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN/bit) -1);
+ tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
tcg_gen_shli_i64(t0, t0, vece);
if (HOST_BIG_ENDIAN) {
- tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN/bit) -1));
+ tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) -1));
}
tcg_gen_trunc_i64_ptr(t1, t0);
tcg_gen_add_ptr(t1, t1, cpu_env);
func(t2, t1, vec_full_offset(a->vj));
- tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, ctx->vl/8, t2);
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, 16, t2);
+ if (oprsz == 32) {
+ func(t2, t1, offsetof(CPULoongArchState, fpr[a->vj].vreg.Q(1)));
+ tcg_gen_gvec_dup_i64(vece,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.Q(1)),
+ 16, 16, t2);
+ }
return true;
}
+static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
+}
+
+static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
+}
+
TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
+TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64)
+TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
+TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
+TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
-static bool trans_vbsll_v(DisasContext *ctx, arg_vv_i *a)
+static bool trans_xvrepl128vei_b(DisasContext *ctx, arg_vv_i * a)
{
- int ofs;
- TCGv_i64 desthigh, destlow, high, low;
-
- if (!avail_LSX(ctx)) {
+ if (!avail_LASX(ctx)) {
return false;
}
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, 32)) {
return true;
}
- desthigh = tcg_temp_new_i64();
- destlow = tcg_temp_new_i64();
- high = tcg_temp_new_i64();
- low = tcg_temp_new_i64();
-
- get_vreg64(low, a->vj, 0);
+ tcg_gen_gvec_dup_mem(MO_8,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.B(0)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.B((a->imm))),
+ 16, 16);
+ tcg_gen_gvec_dup_mem(MO_8,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.B(16)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.B((a->imm + 16))),
+ 16, 16);
+ return true;
+}
- ofs = ((a->imm) & 0xf) * 8;
- if (ofs < 64) {
- get_vreg64(high, a->vj, 1);
- tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
- tcg_gen_shli_i64(destlow, low, ofs);
- } else {
- tcg_gen_shli_i64(desthigh, low, ofs - 64);
- destlow = tcg_constant_i64(0);
+static bool trans_xvrepl128vei_h(DisasContext *ctx, arg_vv_i *a)
+{
+ if (!avail_LASX(ctx)) {
+ return false;
}
- set_vreg64(desthigh, a->vd, 1);
- set_vreg64(destlow, a->vd, 0);
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+ tcg_gen_gvec_dup_mem(MO_16,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.H(0)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.H((a->imm))),
+ 16, 16);
+ tcg_gen_gvec_dup_mem(MO_16,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.H(8)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.H((a->imm + 8))),
+ 16, 16);
return true;
}
-static bool trans_vbsrl_v(DisasContext *ctx, arg_vv_i *a)
+static bool trans_xvrepl128vei_w(DisasContext *ctx, arg_vv_i *a)
{
- TCGv_i64 desthigh, destlow, high, low;
- int ofs;
+ if (!avail_LASX(ctx)) {
+ return false;
+ }
- if (!avail_LSX(ctx)) {
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(MO_32,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.W(0)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.W((a->imm))),
+ 16, 16);
+ tcg_gen_gvec_dup_mem(MO_32,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.W(4)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.W((a->imm + 4))),
+ 16, 16);
+ return true;
+}
+
+static bool trans_xvrepl128vei_d(DisasContext *ctx, arg_vv_i *a)
+{
+ if (!avail_LASX(ctx)) {
return false;
}
- if (!check_vec(ctx, 16)) {
+ if (!check_vec(ctx, 32)) {
return true;
}
- desthigh = tcg_temp_new_i64();
- destlow = tcg_temp_new_i64();
- high = tcg_temp_new_i64();
- low = tcg_temp_new_i64();
+ tcg_gen_gvec_dup_mem(MO_64,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.D(0)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.D((a->imm))),
+ 16, 16);
+ tcg_gen_gvec_dup_mem(MO_64,
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.D(2)),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.D((a->imm + 2))),
+ 16, 16);
+ return true;
+}
+
+#define XVREPLVE0(NAME, MOP) \
+static bool trans_## NAME(DisasContext *ctx, arg_vv * a) \
+{ \
+ if (!avail_LASX(ctx)) { \
+ return false; \
+ } \
+ \
+ if (!check_vec(ctx, 32)) { \
+ return true; \
+ } \
+ \
+ tcg_gen_gvec_dup_mem(MOP, vec_full_offset(a->vd), vec_full_offset(a->vj), \
+ 32, 32); \
+ return true; \
+}
- get_vreg64(high, a->vj, 1);
+XVREPLVE0(xvreplve0_b, MO_8)
+XVREPLVE0(xvreplve0_h, MO_16)
+XVREPLVE0(xvreplve0_w, MO_32)
+XVREPLVE0(xvreplve0_d, MO_64)
+XVREPLVE0(xvreplve0_q, MO_128)
- ofs = ((a->imm) & 0xf) * 8;
- if (ofs < 64) {
- get_vreg64(low, a->vj, 0);
- tcg_gen_extract2_i64(destlow, low, high, ofs);
- tcg_gen_shri_i64(desthigh, high, ofs);
- } else {
- tcg_gen_shri_i64(destlow, high, ofs - 64);
- desthigh = tcg_constant_i64(0);
+TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
+TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
+
+TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
+TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
+
+static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+ int i, max, ofs;
+ TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ max = (oprsz == 16) ? 1 : 2;
+
+ for (i = 0; i < max; i++) {
+ desthigh[i] = tcg_temp_new_i64();
+ destlow[i] = tcg_temp_new_i64();
+ high[i] = tcg_temp_new_i64();
+ low[i] = tcg_temp_new_i64();
+
+ get_vreg64(low[i], a->vj, 2 * i);
+
+ ofs = ((a->imm) & 0xf) * 8;
+ if (ofs < 64) {
+ get_vreg64(high[i], a->vj, 2 * i + 1);
+ tcg_gen_extract2_i64(desthigh[i], low[i], high[i], 64 - ofs);
+ tcg_gen_shli_i64(destlow[i], low[i], ofs);
+ } else {
+ tcg_gen_shli_i64(desthigh[i], low[i], ofs - 64);
+ destlow[i] = tcg_constant_i64(0);
+ }
+ set_vreg64(desthigh[i], a->vd, 2 * i + 1);
+ set_vreg64(destlow[i], a->vd, 2 * i);
}
- set_vreg64(desthigh, a->vd, 1);
- set_vreg64(destlow, a->vd, 0);
+ return true;
+}
+
+static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+ int ofs, i, max;
+ TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ max = (oprsz == 16) ? 1 : 2;
+
+ for (i = 0; i < max; i++) {
+ desthigh[i] = tcg_temp_new_i64();
+ destlow[i] = tcg_temp_new_i64();
+ high[i] = tcg_temp_new_i64();
+ low[i] = tcg_temp_new_i64();
+ get_vreg64(high[i], a->vj, 2 * i + 1);
+
+ ofs = ((a->imm) & 0xf) * 8;
+ if (ofs < 64) {
+ get_vreg64(low[i], a->vj, 2 * i);
+ tcg_gen_extract2_i64(destlow[i], low[i], high[i], ofs);
+ tcg_gen_shri_i64(desthigh[i], high[i], ofs);
+ } else {
+ tcg_gen_shri_i64(destlow[i], high[i], ofs - 64);
+ desthigh[i] = tcg_constant_i64(0);
+ }
+ set_vreg64(desthigh[i], a->vd, 2 * i + 1);
+ set_vreg64(destlow[i], a->vd, 2 * i);
+ }
return true;
}
+TRANS(vbsll_v, LSX, do_vbsll_v, 16)
+TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
+TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
+TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
+
TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 52/57] target/loongarch: Implement xvreplve xvinsve0 xvpickve
2023-09-07 8:31 ` [PATCH RESEND v5 52/57] target/loongarch: Implement xvreplve xvinsve0 xvpickve Song Gao
@ 2023-09-11 23:21 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 23:21 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> +static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
> + uint32_t oprsz, int vece, int bit,
> + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
> {
> TCGv_i64 t0 = tcg_temp_new_i64();
> TCGv_ptr t1 = tcg_temp_new_ptr();
> TCGv_i64 t2 = tcg_temp_new_i64();
>
> + tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
> tcg_gen_shli_i64(t0, t0, vece);
> if (HOST_BIG_ENDIAN) {
> + tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) -1));
> }
>
> tcg_gen_trunc_i64_ptr(t1, t0);
> tcg_gen_add_ptr(t1, t1, cpu_env);
> func(t2, t1, vec_full_offset(a->vj));
> + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), 16, 16, t2);
> + if (oprsz == 32) {
> + func(t2, t1, offsetof(CPULoongArchState, fpr[a->vj].vreg.Q(1)));
> + tcg_gen_gvec_dup_i64(vece,
> + offsetof(CPULoongArchState, fpr[a->vd].vreg.Q(1)),
> + 16, 16, t2);
> + }
This would be clearer as a loop:
for (i = 0; i < oprsz; i += 16) {
func(t2, t1, i);
tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
}
> +static bool trans_xvrepl128vei_b(DisasContext *ctx, arg_vv_i * a)
> {
> + if (!avail_LASX(ctx)) {
> return false;
> }
>
> + if (!check_vec(ctx, 32)) {
> return true;
> }
>
> + tcg_gen_gvec_dup_mem(MO_8,
> + offsetof(CPULoongArchState, fpr[a->vd].vreg.B(0)),
> + offsetof(CPULoongArchState,
> + fpr[a->vj].vreg.B((a->imm))),
> + 16, 16);
> + tcg_gen_gvec_dup_mem(MO_8,
> + offsetof(CPULoongArchState, fpr[a->vd].vreg.B(16)),
> + offsetof(CPULoongArchState,
> + fpr[a->vj].vreg.B((a->imm + 16))),
> + 16, 16);
> + return true;
> +}
Again, a loop. Also, I think you can easily merge all 4 of these functions using VECE.
> +#define XVREPLVE0(NAME, MOP) \
> +static bool trans_## NAME(DisasContext *ctx, arg_vv * a) \
> +{ \
> + if (!avail_LASX(ctx)) { \
> + return false; \
> + } \
> + \
> + if (!check_vec(ctx, 32)) { \
> + return true; \
> + } \
> + \
> + tcg_gen_gvec_dup_mem(MOP, vec_full_offset(a->vd), vec_full_offset(a->vj), \
> + 32, 32); \
> + return true; \
> +}
>
> +XVREPLVE0(xvreplve0_b, MO_8)
> +XVREPLVE0(xvreplve0_h, MO_16)
> +XVREPLVE0(xvreplve0_w, MO_32)
> +XVREPLVE0(xvreplve0_d, MO_64)
> +XVREPLVE0(xvreplve0_q, MO_128)
Should use a helper function and TRANS().
> +static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
> +{
> + int ofs, i, max;
> + TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
> +
> + if (!check_vec(ctx, 32)) {
> + return true;
> + }
> +
> + max = (oprsz == 16) ? 1 : 2;
> +
> + for (i = 0; i < max; i++) {
> + desthigh[i] = tcg_temp_new_i64();
> + destlow[i] = tcg_temp_new_i64();
> + high[i] = tcg_temp_new_i64();
> + low[i] = tcg_temp_new_i64();
> + get_vreg64(high[i], a->vj, 2 * i + 1);
> +
> + ofs = ((a->imm) & 0xf) * 8;
> + if (ofs < 64) {
> + get_vreg64(low[i], a->vj, 2 * i);
> + tcg_gen_extract2_i64(destlow[i], low[i], high[i], ofs);
> + tcg_gen_shri_i64(desthigh[i], high[i], ofs);
> + } else {
> + tcg_gen_shri_i64(destlow[i], high[i], ofs - 64);
> + desthigh[i] = tcg_constant_i64(0);
> + }
> + set_vreg64(desthigh[i], a->vd, 2 * i + 1);
> + set_vreg64(destlow[i], a->vd, 2 * i);
> + }
>
> return true;
> }
Why are you using arrays? They don't seem required.
This would seem clearer as
for (i = 0; i < oprsz / 16; i++) {
TCGv desthi = tcg_temp_new_i64();
...
}
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 53/57] target/loongarch: Implement xvpack xvpick xvilv{l/h}
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (51 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 52/57] target/loongarch: Implement xvreplve xvinsve0 xvpickve Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 54/57] target/loongarch: Implement xvshuf xvperm{i} xvshuf4i Song Gao
` (3 subsequent siblings)
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVPACK{EV/OD}.{B/H/W/D};
- XVPICK{EV/OD}.{B/H/W/D};
- XVILV{L/H}.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/insns.decode | 27 ++++
target/loongarch/disas.c | 27 ++++
target/loongarch/vec_helper.c | 138 +++++++++++---------
target/loongarch/insn_trans/trans_vec.c.inc | 24 ++++
4 files changed, 156 insertions(+), 60 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 74383ba3bc..a325b861c1 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -2012,3 +2012,30 @@ xvpickve_d 0111 01110000 00111 110 .. ..... ..... @vv_ui2
xvbsll_v 0111 01101000 11100 ..... ..... ..... @vv_ui5
xvbsrl_v 0111 01101000 11101 ..... ..... ..... @vv_ui5
+
+xvpackev_b 0111 01010001 01100 ..... ..... ..... @vvv
+xvpackev_h 0111 01010001 01101 ..... ..... ..... @vvv
+xvpackev_w 0111 01010001 01110 ..... ..... ..... @vvv
+xvpackev_d 0111 01010001 01111 ..... ..... ..... @vvv
+xvpackod_b 0111 01010001 10000 ..... ..... ..... @vvv
+xvpackod_h 0111 01010001 10001 ..... ..... ..... @vvv
+xvpackod_w 0111 01010001 10010 ..... ..... ..... @vvv
+xvpackod_d 0111 01010001 10011 ..... ..... ..... @vvv
+
+xvpickev_b 0111 01010001 11100 ..... ..... ..... @vvv
+xvpickev_h 0111 01010001 11101 ..... ..... ..... @vvv
+xvpickev_w 0111 01010001 11110 ..... ..... ..... @vvv
+xvpickev_d 0111 01010001 11111 ..... ..... ..... @vvv
+xvpickod_b 0111 01010010 00000 ..... ..... ..... @vvv
+xvpickod_h 0111 01010010 00001 ..... ..... ..... @vvv
+xvpickod_w 0111 01010010 00010 ..... ..... ..... @vvv
+xvpickod_d 0111 01010010 00011 ..... ..... ..... @vvv
+
+xvilvl_b 0111 01010001 10100 ..... ..... ..... @vvv
+xvilvl_h 0111 01010001 10101 ..... ..... ..... @vvv
+xvilvl_w 0111 01010001 10110 ..... ..... ..... @vvv
+xvilvl_d 0111 01010001 10111 ..... ..... ..... @vvv
+xvilvh_b 0111 01010001 11000 ..... ..... ..... @vvv
+xvilvh_h 0111 01010001 11001 ..... ..... ..... @vvv
+xvilvh_w 0111 01010001 11010 ..... ..... ..... @vvv
+xvilvh_d 0111 01010001 11011 ..... ..... ..... @vvv
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index d091402db6..74ae916a10 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2547,3 +2547,30 @@ INSN_LASX(xvpickve_d, vv_i)
INSN_LASX(xvbsll_v, vv_i)
INSN_LASX(xvbsrl_v, vv_i)
+
+INSN_LASX(xvpackev_b, vvv)
+INSN_LASX(xvpackev_h, vvv)
+INSN_LASX(xvpackev_w, vvv)
+INSN_LASX(xvpackev_d, vvv)
+INSN_LASX(xvpackod_b, vvv)
+INSN_LASX(xvpackod_h, vvv)
+INSN_LASX(xvpackod_w, vvv)
+INSN_LASX(xvpackod_d, vvv)
+
+INSN_LASX(xvpickev_b, vvv)
+INSN_LASX(xvpickev_h, vvv)
+INSN_LASX(xvpickev_w, vvv)
+INSN_LASX(xvpickev_d, vvv)
+INSN_LASX(xvpickod_b, vvv)
+INSN_LASX(xvpickod_h, vvv)
+INSN_LASX(xvpickod_w, vvv)
+INSN_LASX(xvpickod_d, vvv)
+
+INSN_LASX(xvilvl_b, vvv)
+INSN_LASX(xvilvl_h, vvv)
+INSN_LASX(xvilvl_w, vvv)
+INSN_LASX(xvilvl_d, vvv)
+INSN_LASX(xvilvh_b, vvv)
+INSN_LASX(xvilvh_h, vvv)
+INSN_LASX(xvilvh_w, vvv)
+INSN_LASX(xvilvh_d, vvv)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 6832189151..157e075742 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -3241,12 +3241,13 @@ XVPICKVE(xvpickve_d, D, 64, 0x3)
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg temp; \
+ VReg temp = {}; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
temp.E(2 * i + 1) = Vj->E(2 * i); \
temp.E(2 *i) = Vk->E(2 * i); \
} \
@@ -3262,12 +3263,13 @@ VPACKEV(vpackev_d, 128, D)
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i; \
- VReg temp; \
+ VReg temp = {}; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
temp.E(2 * i) = Vk->E(2 * i + 1); \
} \
@@ -3279,20 +3281,24 @@ VPACKOD(vpackod_h, 32, H)
VPACKOD(vpackod_w, 64, W)
VPACKOD(vpackod_d, 128, D)
-#define VPICKEV(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i); \
- temp.E(i) = Vk->E(2 * i); \
- } \
- *Vd = temp; \
+#define VPICKEV(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \
+ temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \
+ } \
+ } \
+ *Vd = temp; \
}
VPICKEV(vpickev_b, 16, B)
@@ -3300,20 +3306,24 @@ VPICKEV(vpickev_h, 32, H)
VPICKEV(vpickev_w, 64, W)
VPICKEV(vpickev_d, 128, D)
-#define VPICKOD(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(i + LSX_LEN/BIT) = Vj->E(2 * i + 1); \
- temp.E(i) = Vk->E(2 * i + 1); \
- } \
- *Vd = temp; \
+#define VPICKOD(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \
+ temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \
+ } \
+ } \
+ *Vd = temp; \
}
VPICKOD(vpickod_b, 16, B)
@@ -3321,20 +3331,24 @@ VPICKOD(vpickod_h, 32, H)
VPICKOD(vpickod_w, 64, W)
VPICKOD(vpickod_d, 128, D)
-#define VILVL(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(2 * i + 1) = Vj->E(i); \
- temp.E(2 * i) = Vk->E(i); \
- } \
- *Vd = temp; \
+#define VILVL(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \
+ temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \
+ } \
+ } \
+ *Vd = temp; \
}
VILVL(vilvl_b, 16, B)
@@ -3342,20 +3356,24 @@ VILVL(vilvl_h, 32, H)
VILVL(vilvl_w, 64, W)
VILVL(vilvl_d, 128, D)
-#define VILVH(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- \
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(2 * i + 1) = Vj->E(i + LSX_LEN/BIT); \
- temp.E(2 * i) = Vk->E(i + LSX_LEN/BIT); \
- } \
- *Vd = temp; \
+#define VILVH(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \
+ temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \
+ } \
+ } \
+ *Vd = temp; \
}
VILVH(vilvh_b, 16, B)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 70babae2c2..495591c114 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -5678,6 +5678,14 @@ TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
+TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
+TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
+TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
+TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
+TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
+TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
+TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
+TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
@@ -5687,6 +5695,14 @@ TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
+TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
+TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
+TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
+TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
+TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
+TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
+TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
+TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
@@ -5696,6 +5712,14 @@ TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
+TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
+TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
+TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
+TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
+TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
+TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
+TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
+TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 54/57] target/loongarch: Implement xvshuf xvperm{i} xvshuf4i
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (52 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 53/57] target/loongarch: Implement xvpack xvpick xvilv{l/h} Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 23:45 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 55/57] target/loongarch: Implement xvld xvst Song Gao
` (2 subsequent siblings)
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVSHUF.{B/H/W/D};
- XVPERM.W;
- XVSHUF4i.{B/H/W/D};
- XVPERMI.{W/D/Q};
- XVEXTRINS.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/helper.h | 3 +
target/loongarch/insns.decode | 21 ++++
target/loongarch/disas.c | 21 ++++
target/loongarch/vec_helper.c | 114 ++++++++++++++++----
target/loongarch/insn_trans/trans_vec.c.inc | 26 +++++
5 files changed, 166 insertions(+), 19 deletions(-)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index fb489dda2d..b3b64a0215 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -709,7 +709,10 @@ DEF_HELPER_FLAGS_4(vshuf4i_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vshuf4i_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vshuf4i_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vperm_w, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(vpermi_w, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vpermi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(vpermi_q, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vextrins_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(vextrins_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index a325b861c1..64b67ee9ac 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -2039,3 +2039,24 @@ xvilvh_b 0111 01010001 11000 ..... ..... ..... @vvv
xvilvh_h 0111 01010001 11001 ..... ..... ..... @vvv
xvilvh_w 0111 01010001 11010 ..... ..... ..... @vvv
xvilvh_d 0111 01010001 11011 ..... ..... ..... @vvv
+
+xvshuf_b 0000 11010110 ..... ..... ..... ..... @vvvv
+xvshuf_h 0111 01010111 10101 ..... ..... ..... @vvv
+xvshuf_w 0111 01010111 10110 ..... ..... ..... @vvv
+xvshuf_d 0111 01010111 10111 ..... ..... ..... @vvv
+
+xvperm_w 0111 01010111 11010 ..... ..... ..... @vvv
+
+xvshuf4i_b 0111 01111001 00 ........ ..... ..... @vv_ui8
+xvshuf4i_h 0111 01111001 01 ........ ..... ..... @vv_ui8
+xvshuf4i_w 0111 01111001 10 ........ ..... ..... @vv_ui8
+xvshuf4i_d 0111 01111001 11 ........ ..... ..... @vv_ui8
+
+xvpermi_w 0111 01111110 01 ........ ..... ..... @vv_ui8
+xvpermi_d 0111 01111110 10 ........ ..... ..... @vv_ui8
+xvpermi_q 0111 01111110 11 ........ ..... ..... @vv_ui8
+
+xvextrins_d 0111 01111000 00 ........ ..... ..... @vv_ui8
+xvextrins_w 0111 01111000 01 ........ ..... ..... @vv_ui8
+xvextrins_h 0111 01111000 10 ........ ..... ..... @vv_ui8
+xvextrins_b 0111 01111000 11 ........ ..... ..... @vv_ui8
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 74ae916a10..1ec8e21e01 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -2574,3 +2574,24 @@ INSN_LASX(xvilvh_b, vvv)
INSN_LASX(xvilvh_h, vvv)
INSN_LASX(xvilvh_w, vvv)
INSN_LASX(xvilvh_d, vvv)
+
+INSN_LASX(xvshuf_b, vvvv)
+INSN_LASX(xvshuf_h, vvv)
+INSN_LASX(xvshuf_w, vvv)
+INSN_LASX(xvshuf_d, vvv)
+
+INSN_LASX(xvperm_w, vvv)
+
+INSN_LASX(xvshuf4i_b, vv_i)
+INSN_LASX(xvshuf4i_h, vv_i)
+INSN_LASX(xvshuf4i_w, vv_i)
+INSN_LASX(xvshuf4i_d, vv_i)
+
+INSN_LASX(xvpermi_w, vv_i)
+INSN_LASX(xvpermi_d, vv_i)
+INSN_LASX(xvpermi_q, vv_i)
+
+INSN_LASX(xvextrins_d, vv_i)
+INSN_LASX(xvextrins_w, vv_i)
+INSN_LASX(xvextrins_h, vv_i)
+INSN_LASX(xvextrins_b, vv_i)
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 157e075742..97b186a3ba 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -3381,20 +3381,29 @@ VILVH(vilvh_h, 32, H)
VILVH(vilvh_w, 64, W)
VILVH(vilvh_d, 128, D)
+#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
+
void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
{
int i, m;
- VReg temp;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
VReg *Vk = (VReg *)vk;
VReg *Va = (VReg *)va;
+ int oprsz = simd_oprsz(desc);
- m = LSX_LEN/8;
- for (i = 0; i < m ; i++) {
+ m = LSX_LEN / 8;
+ for (i = 0; i < m; i++) {
uint64_t k = (uint8_t)Va->B(i) % (2 * m);
temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
}
+ if (oprsz == 32) {
+ for(i = m; i < 2 * m; i++) {
+ uint64_t j = (uint8_t)Va->B(i) % (2 * m);
+ temp.B(i) = j < m ? Vk->B(j + m) : Vj->B(j);
+ }
+ }
*Vd = temp;
}
@@ -3402,16 +3411,23 @@ void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
int i, m; \
- VReg temp; \
+ VReg temp = {}; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
\
- m = LSX_LEN/BIT; \
+ m = LSX_LEN / BIT; \
for (i = 0; i < m; i++) { \
- uint64_t k = ((uint8_t) Vd->E(i)) % (2 * m); \
+ uint64_t k = (uint8_t)Vd->E(i) % (2 * m); \
temp.E(i) = k < m ? Vk->E(k) : Vj->E(k - m); \
} \
+ if (oprsz == 32) { \
+ for (i = m; i < 2 * m; i++) { \
+ uint64_t j = (uint8_t)Vd->E(i) % (2 * m); \
+ temp.E(i) = j < m ? Vk->E(j + m): Vj->E(j); \
+ } \
+ } \
*Vd = temp; \
}
@@ -3422,14 +3438,20 @@ VSHUF(vshuf_d, 64, D)
#define VSHUF4I(NAME, BIT, E) \
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
- int i; \
- VReg temp; \
+ int i, max; \
+ VReg temp = {}; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
- for (i = 0; i < LSX_LEN/BIT; i++) { \
- temp.E(i) = Vj->E(((i) & 0xfc) + (((imm) >> \
- (2 * ((i) & 0x03))) & 0x03)); \
+ max = LSX_LEN / BIT; \
+ for (i = 0; i < max; i++) { \
+ temp.E(i) = Vj->E(SHF_POS(i, imm)); \
+ } \
+ if (oprsz == 32) { \
+ for (i = max; i < 2 * max; i++) { \
+ temp.E(i) = Vj->E(SHF_POS(i - max, imm) + max); \
+ } \
} \
*Vd = temp; \
}
@@ -3440,38 +3462,92 @@ VSHUF4I(vshuf4i_w, 32, W)
void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
+ int i;
+ VReg temp = {};
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- VReg temp;
- temp.D(0) = (imm & 2 ? Vj : Vd)->D(imm & 1);
- temp.D(1) = (imm & 8 ? Vj : Vd)->D((imm >> 2) & 1);
+ for (i = 0; i < oprsz / 16; i++) {
+ temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i);
+ temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i);
+ }
+ *Vd = temp;
+}
+
+void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i, m;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+
+ m = LASX_LEN / 32;
+ for (i = 0; i < m ; i++) {
+ uint64_t k = (uint8_t)Vk->W(i) % 8;
+ temp.W(i) = Vj->W(k);
+ }
*Vd = temp;
}
void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i);
+ temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i);
+ temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i);
+ temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i);
+ }
+ *Vd = temp;
+}
+
+void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ temp.D(0) = Vj->D(imm & 0x3);
+ temp.D(1) = Vj->D((imm >> 2) & 0x3);
+ temp.D(2) = Vj->D((imm >> 4) & 0x3);
+ temp.D(3) = Vj->D((imm >> 6) & 0x3);
+ *Vd = temp;
+}
+
+void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
VReg temp;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
- temp.W(0) = Vj->W(imm & 0x3);
- temp.W(1) = Vj->W((imm >> 2) & 0x3);
- temp.W(2) = Vd->W((imm >> 4) & 0x3);
- temp.W(3) = Vd->W((imm >> 6) & 0x3);
+ temp.Q(0) = (imm & 0x3) > 1 ? Vd->Q((imm & 0x3) - 2) : Vj->Q(imm & 0x3);
+ temp.Q(1) = ((imm >> 4) & 0x3) > 1 ? Vd->Q(((imm >> 4) & 0x3) - 2) :
+ Vj->Q((imm >> 4) & 0x3);
*Vd = temp;
}
#define VEXTRINS(NAME, BIT, E, MASK) \
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
- int ins, extr; \
+ int ins, extr, max; \
VReg *Vd = (VReg *)vd; \
VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
\
+ max = LSX_LEN / BIT; \
ins = (imm >> 4) & MASK; \
extr = imm & MASK; \
Vd->E(ins) = Vj->E(extr); \
+ if (oprsz == 32) { \
+ Vd->E(ins + max) = Vj->E(extr + max); \
+ } \
}
VEXTRINS(vextrins_b, 8, B, 0xf)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 495591c114..767fc06f47 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -83,6 +83,16 @@ static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
return gen_vvvv_vl(ctx, a, 16, fn);
}
+static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4 *fn)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return gen_vvvv_vl(ctx, a, 32, fn);
+}
+
static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
gen_helper_gvec_3_ptr *fn)
{
@@ -5725,17 +5735,33 @@ TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
+TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
+TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
+TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
+TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
+TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
+TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
+TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
+TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
+TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
+TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
+TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
+TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
+TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
+TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
+TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
+TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
{
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* Re: [PATCH RESEND v5 54/57] target/loongarch: Implement xvshuf xvperm{i} xvshuf4i
2023-09-07 8:31 ` [PATCH RESEND v5 54/57] target/loongarch: Implement xvshuf xvperm{i} xvshuf4i Song Gao
@ 2023-09-11 23:45 ` Richard Henderson
0 siblings, 0 replies; 87+ messages in thread
From: Richard Henderson @ 2023-09-11 23:45 UTC (permalink / raw)
To: Song Gao, qemu-devel; +Cc: maobibo
On 9/7/23 01:31, Song Gao wrote:
> void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
> {
> int i, m;
> - VReg temp;
> + VReg temp = {};
> VReg *Vd = (VReg *)vd;
> VReg *Vj = (VReg *)vj;
> VReg *Vk = (VReg *)vk;
> VReg *Va = (VReg *)va;
> + int oprsz = simd_oprsz(desc);
>
> - m = LSX_LEN/8;
> - for (i = 0; i < m ; i++) {
> + m = LSX_LEN / 8;
> + for (i = 0; i < m; i++) {
> uint64_t k = (uint8_t)Va->B(i) % (2 * m);
> temp.B(i) = k < m ? Vk->B(k) : Vj->B(k - m);
> }
> + if (oprsz == 32) {
> + for(i = m; i < 2 * m; i++) {
> + uint64_t j = (uint8_t)Va->B(i) % (2 * m);
> + temp.B(i) = j < m ? Vk->B(j + m) : Vj->B(j);
> + }
> + }
Loop, not a compare against oprsz. Several instances.
> +void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
> {
> VReg temp;
> VReg *Vd = (VReg *)vd;
> VReg *Vj = (VReg *)vj;
>
> + temp.Q(0) = (imm & 0x3) > 1 ? Vd->Q((imm & 0x3) - 2) : Vj->Q(imm & 0x3);
> + temp.Q(1) = ((imm >> 4) & 0x3) > 1 ? Vd->Q(((imm >> 4) & 0x3) - 2) :
> + Vj->Q((imm >> 4) & 0x3);
for (i = 0; i < 2; i++, imm >>= 4) {
temp.Q(i) = (imm & 2 ? Vd : Vj)->Q(imm & 1);
}
r~
^ permalink raw reply [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 55/57] target/loongarch: Implement xvld xvst
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (53 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 54/57] target/loongarch: Implement xvshuf xvperm{i} xvshuf4i Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 23:47 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 56/57] target/loongarch: Move simply DO_XX marcos togther Song Gao
2023-09-07 8:31 ` [PATCH RESEND v5 57/57] target/loongarch: CPUCFG support LASX Song Gao
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
This patch includes:
- XVLD[X], XVST[X];
- XVLDREPL.{B/H/W/D};
- XVSTELM.{B/H/W/D}.
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/insns.decode | 18 +++
target/loongarch/disas.c | 24 ++++
target/loongarch/insn_trans/trans_vec.c.inc | 143 ++++++++++++++++++--
3 files changed, 175 insertions(+), 10 deletions(-)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 64b67ee9ac..64b308f9fb 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -550,6 +550,10 @@ dbcl 0000 00000010 10101 ............... @i15
@vr_i8i2 .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s2
@vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1
@vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii
+@vr_i8i2x .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s3
+@vr_i8i3x .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s2
+@vr_i8i4x .... ...... imm2:4 ........ rj:5 vd:5 &vr_ii imm=%i8s1
+@vr_i8i5x .... ..... imm2:5 imm:s8 rj:5 vd:5 &vr_ii
@vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr
@v_i13 .... ........ .. imm:13 vd:5 &v_i
@@ -2060,3 +2064,17 @@ xvextrins_d 0111 01111000 00 ........ ..... ..... @vv_ui8
xvextrins_w 0111 01111000 01 ........ ..... ..... @vv_ui8
xvextrins_h 0111 01111000 10 ........ ..... ..... @vv_ui8
xvextrins_b 0111 01111000 11 ........ ..... ..... @vv_ui8
+
+xvld 0010 110010 ............ ..... ..... @vr_i12
+xvst 0010 110011 ............ ..... ..... @vr_i12
+xvldx 0011 10000100 10000 ..... ..... ..... @vrr
+xvstx 0011 10000100 11000 ..... ..... ..... @vrr
+
+xvldrepl_d 0011 00100001 0 ......... ..... ..... @vr_i9
+xvldrepl_w 0011 00100010 .......... ..... ..... @vr_i10
+xvldrepl_h 0011 0010010 ........... ..... ..... @vr_i11
+xvldrepl_b 0011 001010 ............ ..... ..... @vr_i12
+xvstelm_d 0011 00110001 .. ........ ..... ..... @vr_i8i2x
+xvstelm_w 0011 0011001 ... ........ ..... ..... @vr_i8i3x
+xvstelm_h 0011 001101 .... ........ ..... ..... @vr_i8i4x
+xvstelm_b 0011 00111 ..... ........ ..... ..... @vr_i8i5x
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 1ec8e21e01..c8a29eac2b 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1753,6 +1753,16 @@ static void output_vvr_x(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
output(ctx, mnemonic, "x%d, x%d, r%d", a->vd, a->vj, a->rk);
}
+static void output_vrr_x(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, r%d, r%d", a->vd, a->rj, a->rk);
+}
+
+static void output_vr_ii_x(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "x%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2);
+}
+
INSN_LASX(xvadd_b, vvv)
INSN_LASX(xvadd_h, vvv)
INSN_LASX(xvadd_w, vvv)
@@ -2595,3 +2605,17 @@ INSN_LASX(xvextrins_d, vv_i)
INSN_LASX(xvextrins_w, vv_i)
INSN_LASX(xvextrins_h, vv_i)
INSN_LASX(xvextrins_b, vv_i)
+
+INSN_LASX(xvld, vr_i)
+INSN_LASX(xvst, vr_i)
+INSN_LASX(xvldx, vrr)
+INSN_LASX(xvstx, vrr)
+
+INSN_LASX(xvldrepl_d, vr_i)
+INSN_LASX(xvldrepl_w, vr_i)
+INSN_LASX(xvldrepl_h, vr_i)
+INSN_LASX(xvldrepl_b, vr_i)
+INSN_LASX(xvstelm_d, vr_ii)
+INSN_LASX(xvstelm_w, vr_ii)
+INSN_LASX(xvstelm_h, vr_ii)
+INSN_LASX(xvstelm_b, vr_ii)
diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc
index 767fc06f47..f27c8b3508 100644
--- a/target/loongarch/insn_trans/trans_vec.c.inc
+++ b/target/loongarch/insn_trans/trans_vec.c.inc
@@ -5904,22 +5904,57 @@ static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
return true; \
}
-VLDREPL(vldrepl_b, MO_8)
-VLDREPL(vldrepl_h, MO_16)
-VLDREPL(vldrepl_w, MO_32)
-VLDREPL(vldrepl_d, MO_64)
+static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
+ uint32_t oprsz, MemOp mop)
+{
+ TCGv addr;
+ TCGv_i64 val;
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i64();
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
+ tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
+
+ return true;
+}
+
+static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
+{
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ return do_vldrepl_vl(ctx, a, 16, mop);
+}
+
+static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ return do_vldrepl_vl(ctx, a, 32, mop);
+}
+
+TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
+TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
+TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
+TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
+TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
+TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
+TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
+TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
#define VSTELM(NAME, MO, E) \
-static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
+static bool do_## NAME (DisasContext *ctx, arg_vr_ii *a, uint32_t oprsz) \
{ \
TCGv addr; \
TCGv_i64 val; \
\
- if (!avail_LSX(ctx)) { \
- return false; \
- } \
- \
- if (!check_vec(ctx, 16)) { \
+ if (!check_vec(ctx, oprsz)) { \
return true; \
} \
\
@@ -5939,3 +5974,91 @@ VSTELM(vstelm_b, MO_8, B)
VSTELM(vstelm_h, MO_16, H)
VSTELM(vstelm_w, MO_32, W)
VSTELM(vstelm_d, MO_64, D)
+VSTELM(xvstelm_b, MO_8, B)
+VSTELM(xvstelm_h, MO_16, H)
+VSTELM(xvstelm_w, MO_32, W)
+VSTELM(xvstelm_d, MO_64, D)
+
+TRANS(vstelm_b, LSX, do_vstelm_b, 16)
+TRANS(vstelm_h, LSX, do_vstelm_h, 16)
+TRANS(vstelm_w, LSX, do_vstelm_w, 16)
+TRANS(vstelm_d, LSX, do_vstelm_d, 16)
+TRANS(xvstelm_b, LASX, do_xvstelm_b, 32)
+TRANS(xvstelm_h, LASX, do_xvstelm_h, 32)
+TRANS(xvstelm_w, LASX, do_xvstelm_w, 32)
+TRANS(xvstelm_d, LASX, do_xvstelm_d, 32)
+
+static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
+ void (*func)(DisasContext *, int, TCGv))
+{
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv temp = NULL;
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ if (a->imm) {
+ temp = tcg_temp_new();
+ tcg_gen_addi_tl(temp, addr, a->imm);
+ addr = temp;
+ }
+
+ func(ctx, a->vd, addr);
+ return true;
+}
+
+static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
+{
+ int i;
+ TCGv temp = tcg_temp_new();
+ TCGv dest = tcg_temp_new();
+
+ tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
+ set_vreg64(dest, vreg, 0);
+
+ for (i = 1; i < 4; i++) {
+ tcg_gen_addi_tl(temp, addr, 8 * i);
+ tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
+ set_vreg64(dest, vreg, i);
+ }
+}
+
+static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
+{
+ int i;
+ TCGv temp = tcg_temp_new();
+ TCGv dest = tcg_temp_new();
+
+ get_vreg64(dest, vreg, 0);
+ tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
+
+ for (i = 1; i < 4; i++) {
+ tcg_gen_addi_tl(temp, addr, 8 * i);
+ get_vreg64(dest, vreg, i);
+ tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
+ }
+}
+
+TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
+TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
+
+static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
+ void (*func)(DisasContext*, int, TCGv))
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv addr = tcg_temp_new();
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ tcg_gen_add_tl(addr, src1, src2);
+ func(ctx, a->vd, addr);
+
+ return true;
+}
+
+TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
+TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 56/57] target/loongarch: Move simply DO_XX marcos togther
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (54 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 55/57] target/loongarch: Implement xvld xvst Song Gao
@ 2023-09-07 8:31 ` Song Gao
2023-09-11 23:48 ` Richard Henderson
2023-09-07 8:31 ` [PATCH RESEND v5 57/57] target/loongarch: CPUCFG support LASX Song Gao
56 siblings, 1 reply; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/vec.h | 42 ++++++++++++++++++++++++++++++
target/loongarch/vec_helper.c | 48 -----------------------------------
2 files changed, 42 insertions(+), 48 deletions(-)
diff --git a/target/loongarch/vec.h b/target/loongarch/vec.h
index 2f23cae7d7..3c9adf8427 100644
--- a/target/loongarch/vec.h
+++ b/target/loongarch/vec.h
@@ -30,4 +30,46 @@
#define Q(x) Q[x]
#endif /* HOST_BIG_ENDIAN */
+#define DO_ADD(a, b) (a + b)
+#define DO_SUB(a, b) (a - b)
+#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
+#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
+#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a))
+#define DO_VABS(a) ((a < 0) ? (-a) : (a))
+#define DO_MIN(a, b) (a < b ? a : b)
+#define DO_MAX(a, b) (a > b ? a : b)
+#define DO_MUL(a, b) (a * b)
+#define DO_MADD(a, b, c) (a + b * c)
+#define DO_MSUB(a, b, c) (a - b * c)
+
+#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
+#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
+#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
+#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
+ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
+
+#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
+
+#define R_SHIFT(a, b) (a >> b)
+
+#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
+#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16)
+#define DO_CLO_W(N) (clz32(~N))
+#define DO_CLO_D(N) (clz64(~N))
+#define DO_CLZ_B(N) (clz32(N) - 24)
+#define DO_CLZ_H(N) (clz32(N) - 16)
+#define DO_CLZ_W(N) (clz32(N))
+#define DO_CLZ_D(N) (clz64(N))
+
+#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
+#define DO_BITSET(a, bit) (a | 1ull << bit)
+#define DO_BITREV(a, bit) (a ^ (1ull << bit))
+
+#define VSEQ(a, b) (a == b ? -1 : 0)
+#define VSLE(a, b) (a <= b ? -1 : 0)
+#define VSLT(a, b) (a < b ? -1 : 0)
+
+#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
+
#endif /* LOONGARCH_VEC_H */
diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c
index 97b186a3ba..675bd02f7d 100644
--- a/target/loongarch/vec_helper.c
+++ b/target/loongarch/vec_helper.c
@@ -15,9 +15,6 @@
#include "vec.h"
#include "tcg/tcg-gvec-desc.h"
-#define DO_ADD(a, b) (a + b)
-#define DO_SUB(a, b) (a - b)
-
#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -347,9 +344,6 @@ DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
-#define DO_VAVG(a, b) ((a >> 1) + (b >> 1) + (a & b & 1))
-#define DO_VAVGR(a, b) ((a >> 1) + (b >> 1) + ((a | b) & 1))
-
#define DO_3OP(NAME, BIT, E, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -381,8 +375,6 @@ DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
-#define DO_VABSD(a, b) ((a > b) ? (a -b) : (b-a))
-
DO_3OP(vabsd_b, 8, B, DO_VABSD)
DO_3OP(vabsd_h, 16, H, DO_VABSD)
DO_3OP(vabsd_w, 32, W, DO_VABSD)
@@ -392,8 +384,6 @@ DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
DO_3OP(vabsd_du, 64, UD, DO_VABSD)
-#define DO_VABS(a) ((a < 0) ? (-a) : (a))
-
#define DO_VADDA(NAME, BIT, E) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -413,9 +403,6 @@ DO_VADDA(vadda_h, 16, H)
DO_VADDA(vadda_w, 32, W)
DO_VADDA(vadda_d, 64, D)
-#define DO_MIN(a, b) (a < b ? a : b)
-#define DO_MAX(a, b) (a > b ? a : b)
-
#define VMINMAXI(NAME, BIT, E, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
@@ -500,8 +487,6 @@ DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
-#define DO_MUL(a, b) (a * b)
-
DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
@@ -526,9 +511,6 @@ DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-#define DO_MADD(a, b, c) (a + b * c)
-#define DO_MSUB(a, b, c) (a - b * c)
-
#define VMADDSUB(NAME, BIT, E, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -639,13 +621,6 @@ VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-#define DO_DIVU(N, M) (unlikely(M == 0) ? 0 : N / M)
-#define DO_REMU(N, M) (unlikely(M == 0) ? 0 : N % M)
-#define DO_DIV(N, M) (unlikely(M == 0) ? 0 :\
- unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
-#define DO_REM(N, M) (unlikely(M == 0) ? 0 :\
- unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
-
#define VDIV(NAME, BIT, E, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -791,8 +766,6 @@ VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
VEXT2XV(vext2xv_du_hu, 64, UD, UH)
VEXT2XV(vext2xv_du_wu, 64, UD, UW)
-#define DO_SIGNCOV(a, b) (a == 0 ? 0 : a < 0 ? -b : b)
-
DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
@@ -1107,8 +1080,6 @@ VSRARI(vsrari_h, 16, H)
VSRARI(vsrari_w, 32, W)
VSRARI(vsrari_d, 64, D)
-#define R_SHIFT(a, b) (a >> b)
-
#define VSRLN(NAME, BIT, E1, E2) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -2272,15 +2243,6 @@ void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
} \
}
-#define DO_CLO_B(N) (clz32(~N & 0xff) - 24)
-#define DO_CLO_H(N) (clz32(~N & 0xffff) - 16)
-#define DO_CLO_W(N) (clz32(~N))
-#define DO_CLO_D(N) (clz64(~N))
-#define DO_CLZ_B(N) (clz32(N) - 24)
-#define DO_CLZ_H(N) (clz32(N) - 16)
-#define DO_CLZ_W(N) (clz32(N))
-#define DO_CLZ_D(N) (clz64(N))
-
DO_2OP(vclo_b, 8, UB, DO_CLO_B)
DO_2OP(vclo_h, 16, UH, DO_CLO_H)
DO_2OP(vclo_w, 32, UW, DO_CLO_W)
@@ -2309,10 +2271,6 @@ VPCNT(vpcnt_h, 16, UH, ctpop16)
VPCNT(vpcnt_w, 32, UW, ctpop32)
VPCNT(vpcnt_d, 64, UD, ctpop64)
-#define DO_BITCLR(a, bit) (a & ~(1ull << bit))
-#define DO_BITSET(a, bit) (a | 1ull << bit)
-#define DO_BITREV(a, bit) (a ^ (1ull << bit))
-
#define DO_BIT(NAME, BIT, E, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
{ \
@@ -3053,10 +3011,6 @@ void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
*Vd = temp;
}
-#define VSEQ(a, b) (a == b ? -1 : 0)
-#define VSLE(a, b) (a <= b ? -1 : 0)
-#define VSLT(a, b) (a < b ? -1 : 0)
-
#define VCMPI(NAME, BIT, E, DO_OP) \
void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
{ \
@@ -3381,8 +3335,6 @@ VILVH(vilvh_h, 32, H)
VILVH(vilvh_w, 64, W)
VILVH(vilvh_d, 128, D)
-#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
-
void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
{
int i, m;
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread
* [PATCH RESEND v5 57/57] target/loongarch: CPUCFG support LASX
2023-09-07 8:31 [PATCH RESEND v5 00/57] Add LoongArch LASX instructions Song Gao
` (55 preceding siblings ...)
2023-09-07 8:31 ` [PATCH RESEND v5 56/57] target/loongarch: Move simply DO_XX marcos togther Song Gao
@ 2023-09-07 8:31 ` Song Gao
56 siblings, 0 replies; 87+ messages in thread
From: Song Gao @ 2023-09-07 8:31 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, maobibo
Signed-off-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/cpu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index a1d3f680d8..fc7f70fbe5 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -393,6 +393,7 @@ static void loongarch_la464_initfn(Object *obj)
data = FIELD_DP32(data, CPUCFG2, FP_DP, 1);
data = FIELD_DP32(data, CPUCFG2, FP_VER, 1);
data = FIELD_DP32(data, CPUCFG2, LSX, 1),
+ data = FIELD_DP32(data, CPUCFG2, LASX, 1),
data = FIELD_DP32(data, CPUCFG2, LLFTP, 1);
data = FIELD_DP32(data, CPUCFG2, LLFTP_VER, 1);
data = FIELD_DP32(data, CPUCFG2, LSPW, 1);
--
2.39.1
^ permalink raw reply related [flat|nested] 87+ messages in thread