* [PATCH 0/2] tcg/tci: Two regression fixes
@ 2023-06-07 5:46 Richard Henderson
2023-06-07 5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07 5:46 UTC (permalink / raw)
To: qemu-devel; +Cc: sw
Two recent regressions, both related to recent tcg changes.
Our CI does not test TCI with --enable-debug-tcg, which given timeout
constraints is probably correct, but in this case resulted in an
infinite loop on aarch64 multiarch/memory.c with FEAT_LSE2 enabled.
r~
Richard Henderson (2):
tcg/tci: Adjust passing of MemOpIdx
tcg/tci: Adjust call-clobbered regs for int128_t
tcg/tci.c | 30 +++++++++++++-----------------
tcg/tci/tcg-target.c.inc | 30 +++++++++---------------------
2 files changed, 22 insertions(+), 38 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx
2023-06-07 5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
@ 2023-06-07 5:46 ` Richard Henderson
2023-06-07 5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
2023-06-07 5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07 5:46 UTC (permalink / raw)
To: qemu-devel; +Cc: sw
Since adding MO_ATOM_MASK, the maximum MemOpIdx requires 15 bits,
which overflows the 12 bit field allocated for TCI memory ops.
Expand the field to 16 bits for 2-operand memory ops, and place
the value in TCG_REG_TMP for 3-operand memory ops (same as we
already do for 4-operand memory ops).
Cures a debug assert for aarch64, with FEAT_LSE enabled.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tci.c | 30 +++++++++++++-----------------
tcg/tci/tcg-target.c.inc | 21 ++++-----------------
2 files changed, 17 insertions(+), 34 deletions(-)
diff --git a/tcg/tci.c b/tcg/tci.c
index 813572ff39..4640902c88 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -106,7 +106,7 @@ static void tci_args_rrm(uint32_t insn, TCGReg *r0,
{
*r0 = extract32(insn, 8, 4);
*r1 = extract32(insn, 12, 4);
- *m2 = extract32(insn, 20, 12);
+ *m2 = extract32(insn, 16, 16);
}
static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
@@ -141,15 +141,6 @@ static void tci_args_rrrc(uint32_t insn,
*c3 = extract32(insn, 20, 4);
}
-static void tci_args_rrrm(uint32_t insn,
- TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3)
-{
- *r0 = extract32(insn, 8, 4);
- *r1 = extract32(insn, 12, 4);
- *r2 = extract32(insn, 16, 4);
- *m3 = extract32(insn, 20, 12);
-}
-
static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
TCGReg *r2, uint8_t *i3, uint8_t *i4)
{
@@ -929,8 +920,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
} else {
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
taddr = tci_uint64(regs[r2], regs[r1]);
+ oi = regs[r3];
}
do_ld_i32:
regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
@@ -941,8 +933,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = (uint32_t)regs[r1];
} else {
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
taddr = (uint32_t)regs[r2];
+ oi = regs[r3];
}
goto do_ld_i64;
case INDEX_op_qemu_ld_a64_i64:
@@ -972,8 +965,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rrm(insn, &r0, &r1, &oi);
taddr = regs[r1];
} else {
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
taddr = tci_uint64(regs[r2], regs[r1]);
+ oi = regs[r3];
}
do_st_i32:
tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
@@ -985,9 +979,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tmp64 = regs[r0];
taddr = (uint32_t)regs[r1];
} else {
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
tmp64 = tci_uint64(regs[r1], regs[r0]);
taddr = (uint32_t)regs[r2];
+ oi = regs[r3];
}
goto do_st_i64;
case INDEX_op_qemu_st_a64_i64:
@@ -1293,9 +1288,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
op_name, str_r(r0), str_r(r1), oi);
break;
case 3:
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
- info->fprintf_func(info->stream, "%-12s %s, %s, %s, %x",
- op_name, str_r(r0), str_r(r1), str_r(r2), oi);
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
+ op_name, str_r(r0), str_r(r1),
+ str_r(r2), str_r(r3));
break;
case 4:
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index c9516a5e8b..5b456e1277 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -331,11 +331,11 @@ static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
{
tcg_insn_unit insn = 0;
- tcg_debug_assert(m2 == extract32(m2, 0, 12));
+ tcg_debug_assert(m2 == extract32(m2, 0, 16));
insn = deposit32(insn, 0, 8, op);
insn = deposit32(insn, 8, 4, r0);
insn = deposit32(insn, 12, 4, r1);
- insn = deposit32(insn, 20, 12, m2);
+ insn = deposit32(insn, 16, 16, m2);
tcg_out32(s, insn);
}
@@ -392,20 +392,6 @@ static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
tcg_out32(s, insn);
}
-static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
- TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
-{
- tcg_insn_unit insn = 0;
-
- tcg_debug_assert(m3 == extract32(m3, 0, 12));
- insn = deposit32(insn, 0, 8, op);
- insn = deposit32(insn, 8, 4, r0);
- insn = deposit32(insn, 12, 4, r1);
- insn = deposit32(insn, 16, 4, r2);
- insn = deposit32(insn, 20, 12, m3);
- tcg_out32(s, insn);
-}
-
static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
{
@@ -860,7 +846,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
} else {
- tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
+ tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP);
}
break;
case INDEX_op_qemu_ld_a64_i64:
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t
2023-06-07 5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2023-06-07 5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
@ 2023-06-07 5:46 ` Richard Henderson
2023-06-07 7:44 ` Philippe Mathieu-Daudé
2023-06-07 5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2 siblings, 1 reply; 5+ messages in thread
From: Richard Henderson @ 2023-06-07 5:46 UTC (permalink / raw)
To: qemu-devel; +Cc: sw
We require either 2 or 4 registers to hold int128_t.
Failure to do so results in a register allocation assert.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tci/tcg-target.c.inc | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 5b456e1277..0037f904f1 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -179,8 +179,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
}
static const int tcg_target_reg_alloc_order[] = {
- TCG_REG_R2,
- TCG_REG_R3,
TCG_REG_R4,
TCG_REG_R5,
TCG_REG_R6,
@@ -193,6 +191,9 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R13,
TCG_REG_R14,
TCG_REG_R15,
+ /* Either 2 or 4 of these are call clobbered, so use them last. */
+ TCG_REG_R3,
+ TCG_REG_R2,
TCG_REG_R1,
TCG_REG_R0,
};
@@ -934,11 +935,11 @@ static void tcg_target_init(TCGContext *s)
/*
* The interpreter "registers" are in the local stack frame and
* cannot be clobbered by the called helper functions. However,
- * the interpreter assumes a 64-bit return value and assigns to
+ * the interpreter assumes a 128-bit return value and assigns to
* the return value registers.
*/
tcg_target_call_clobber_regs =
- MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
+ MAKE_64BIT_MASK(TCG_REG_R0, 128 / TCG_TARGET_REG_BITS);
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 0/2] tcg/tci: Two regression fixes
2023-06-07 5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2023-06-07 5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
2023-06-07 5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
@ 2023-06-07 5:53 ` Richard Henderson
2 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07 5:53 UTC (permalink / raw)
To: qemu-devel; +Cc: sw
On 6/6/23 22:46, Richard Henderson wrote:
> Two recent regressions, both related to recent tcg changes.
>
> Our CI does not test TCI with --enable-debug-tcg, which given timeout
> constraints is probably correct, but in this case resulted in an
> infinite loop on aarch64 multiarch/memory.c with FEAT_LSE2 enabled.
To expand on that: with --enable-debug-tcg, assertions fire and catch the bug; without the
assertions, the generated bytecode is incorrect, which leads to the loop.
r~
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t
2023-06-07 5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
@ 2023-06-07 7:44 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 5+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-06-07 7:44 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: sw
On 7/6/23 07:46, Richard Henderson wrote:
> We require either 2 or 4 registers to hold int128_t.
> Failure to do so results in a register allocation assert.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/tci/tcg-target.c.inc | 9 +++++----
> 1 file changed, 5 insertions(+), 4 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-06-07 7:44 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-06-07 5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2023-06-07 5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
2023-06-07 5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
2023-06-07 7:44 ` Philippe Mathieu-Daudé
2023-06-07 5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).