* [PATCH] tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs
@ 2024-05-10 9:12 Richard Henderson
2024-05-10 10:08 ` [PATCH] tcg/loongarch64: Fill out tcg_out_{ld, st} " gaosong
0 siblings, 1 reply; 2+ messages in thread
From: Richard Henderson @ 2024-05-10 9:12 UTC (permalink / raw)
To: qemu-devel; +Cc: gaosong, git, qemu-stable
TCG register spill/fill uses tcg_out_ld/st with all types,
not necessarily going through INDEX_op_{ld,st}_vec.
Cc: qemu-stable@nongnu.org
Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2336
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/loongarch64/tcg-target.c.inc | 103 ++++++++++++++++++++++++-------
1 file changed, 80 insertions(+), 23 deletions(-)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 69c5b8ac4f..06ca1ab11c 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -808,18 +808,88 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
}
}
-static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
+ TCGReg base, intptr_t offset)
{
- bool is_32bit = type == TCG_TYPE_I32;
- tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (dest < TCG_REG_V0) {
+ tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
+ } else {
+ tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (dest < TCG_REG_V0) {
+ tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
+ } else {
+ tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset);
+ }
+ break;
+ case TCG_TYPE_V128:
+ if (-0x800 <= offset && offset <= 0x7ff) {
+ tcg_out_opc_vld(s, dest, base, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
+ TCGReg base, intptr_t offset)
{
- bool is_32bit = type == TCG_TYPE_I32;
- tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (src < TCG_REG_V0) {
+ tcg_out_ldst(s, OPC_ST_W, src, base, offset);
+ } else {
+ /* TODO: Could use fst_s, fstx_s */
+ if (offset < -0x100 || offset > 0xff || (offset & 3)) {
+ if (-0x800 <= offset && offset <= 0x7ff) {
+ tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
+ }
+ base = TCG_REG_TMP0;
+ offset = 0;
+ }
+ tcg_out_opc_vstelm_w(s, src, base, offset, 0);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (src < TCG_REG_V0) {
+ tcg_out_ldst(s, OPC_ST_D, src, base, offset);
+ } else {
+ /* TODO: Could use fst_d, fstx_d */
+ if (offset < -0x100 || offset > 0xff || (offset & 7)) {
+ if (-0x800 <= offset && offset <= 0x7ff) {
+ tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
+ }
+ base = TCG_REG_TMP0;
+ offset = 0;
+ }
+ tcg_out_opc_vstelm_d(s, src, base, offset, 0);
+ }
+ break;
+ case TCG_TYPE_V128:
+ if (-0x800 <= offset && offset <= 0x7ff) {
+ tcg_out_opc_vst(s, src, base, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -1740,7 +1810,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
{
TCGType type = vecl + TCG_TYPE_V64;
TCGArg a0, a1, a2, a3;
- TCGReg temp = TCG_REG_TMP0;
TCGReg temp_vec = TCG_VEC_TMP0;
static const LoongArchInsn cmp_vec_insn[16][4] = {
@@ -1820,22 +1889,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
switch (opc) {
case INDEX_op_st_vec:
- /* Try to fit vst imm */
- if (-0x800 <= a2 && a2 <= 0x7ff) {
- tcg_out_opc_vst(s, a0, a1, a2);
- } else {
- tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
- tcg_out_opc_vstx(s, a0, a1, temp);
- }
+ tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_ld_vec:
- /* Try to fit vld imm */
- if (-0x800 <= a2 && a2 <= 0x7ff) {
- tcg_out_opc_vld(s, a0, a1, a2);
- } else {
- tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
- tcg_out_opc_vldx(s, a0, a1, temp);
- }
+ tcg_out_ld(s, type, a0, a1, a2);
break;
case INDEX_op_and_vec:
tcg_out_opc_vand_v(s, a0, a1, a2);
--
2.34.1
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH] tcg/loongarch64: Fill out tcg_out_{ld, st} for vector regs
2024-05-10 9:12 [PATCH] tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs Richard Henderson
@ 2024-05-10 10:08 ` gaosong
0 siblings, 0 replies; 2+ messages in thread
From: gaosong @ 2024-05-10 10:08 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: git, qemu-stable
在 2024/5/10 下午5:12, Richard Henderson 写道:
> TCG register spill/fill uses tcg_out_ld/st with all types,
> not necessarily going through INDEX_op_{ld,st}_vec.
>
> Cc: qemu-stable@nongnu.org
> Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2336
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/loongarch64/tcg-target.c.inc | 103 ++++++++++++++++++++++++-------
> 1 file changed, 80 insertions(+), 23 deletions(-)
Tested-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Song Gao <gaosong@loongson.cn>
Thanks.
Song Gao
> diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
> index 69c5b8ac4f..06ca1ab11c 100644
> --- a/tcg/loongarch64/tcg-target.c.inc
> +++ b/tcg/loongarch64/tcg-target.c.inc
> @@ -808,18 +808,88 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
> }
> }
>
> -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
> - TCGReg arg1, intptr_t arg2)
> +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
> + TCGReg base, intptr_t offset)
> {
> - bool is_32bit = type == TCG_TYPE_I32;
> - tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
> + switch (type) {
> + case TCG_TYPE_I32:
> + if (dest < TCG_REG_V0) {
> + tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
> + } else {
> + tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset);
> + }
> + break;
> + case TCG_TYPE_I64:
> + if (dest < TCG_REG_V0) {
> + tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
> + } else {
> + tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset);
> + }
> + break;
> + case TCG_TYPE_V128:
> + if (-0x800 <= offset && offset <= 0x7ff) {
> + tcg_out_opc_vld(s, dest, base, offset);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> + tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
> + }
> + break;
> + default:
> + g_assert_not_reached();
> + }
> }
>
> -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
> - TCGReg arg1, intptr_t arg2)
> +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
> + TCGReg base, intptr_t offset)
> {
> - bool is_32bit = type == TCG_TYPE_I32;
> - tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
> + switch (type) {
> + case TCG_TYPE_I32:
> + if (src < TCG_REG_V0) {
> + tcg_out_ldst(s, OPC_ST_W, src, base, offset);
> + } else {
> + /* TODO: Could use fst_s, fstx_s */
> + if (offset < -0x100 || offset > 0xff || (offset & 3)) {
> + if (-0x800 <= offset && offset <= 0x7ff) {
> + tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> + tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
> + }
> + base = TCG_REG_TMP0;
> + offset = 0;
> + }
> + tcg_out_opc_vstelm_w(s, src, base, offset, 0);
> + }
> + break;
> + case TCG_TYPE_I64:
> + if (src < TCG_REG_V0) {
> + tcg_out_ldst(s, OPC_ST_D, src, base, offset);
> + } else {
> + /* TODO: Could use fst_d, fstx_d */
> + if (offset < -0x100 || offset > 0xff || (offset & 7)) {
> + if (-0x800 <= offset && offset <= 0x7ff) {
> + tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> + tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
> + }
> + base = TCG_REG_TMP0;
> + offset = 0;
> + }
> + tcg_out_opc_vstelm_d(s, src, base, offset, 0);
> + }
> + break;
> + case TCG_TYPE_V128:
> + if (-0x800 <= offset && offset <= 0x7ff) {
> + tcg_out_opc_vst(s, src, base, offset);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> + tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
> + }
> + break;
> + default:
> + g_assert_not_reached();
> + }
> }
>
> static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
> @@ -1740,7 +1810,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
> {
> TCGType type = vecl + TCG_TYPE_V64;
> TCGArg a0, a1, a2, a3;
> - TCGReg temp = TCG_REG_TMP0;
> TCGReg temp_vec = TCG_VEC_TMP0;
>
> static const LoongArchInsn cmp_vec_insn[16][4] = {
> @@ -1820,22 +1889,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>
> switch (opc) {
> case INDEX_op_st_vec:
> - /* Try to fit vst imm */
> - if (-0x800 <= a2 && a2 <= 0x7ff) {
> - tcg_out_opc_vst(s, a0, a1, a2);
> - } else {
> - tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
> - tcg_out_opc_vstx(s, a0, a1, temp);
> - }
> + tcg_out_st(s, type, a0, a1, a2);
> break;
> case INDEX_op_ld_vec:
> - /* Try to fit vld imm */
> - if (-0x800 <= a2 && a2 <= 0x7ff) {
> - tcg_out_opc_vld(s, a0, a1, a2);
> - } else {
> - tcg_out_movi(s, TCG_TYPE_I64, temp, a2);
> - tcg_out_opc_vldx(s, a0, a1, temp);
> - }
> + tcg_out_ld(s, type, a0, a1, a2);
> break;
> case INDEX_op_and_vec:
> tcg_out_opc_vand_v(s, a0, a1, a2);
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-05-10 10:09 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-10 9:12 [PATCH] tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs Richard Henderson
2024-05-10 10:08 ` [PATCH] tcg/loongarch64: Fill out tcg_out_{ld, st} " gaosong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).