qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] tcg/tci: Two regression fixes
@ 2023-06-07  5:46 Richard Henderson
  2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

Two recent regressions, both related to recent tcg changes.

Our CI does not test TCI with --enable-debug-tcg, which given timeout
constraints is probably correct, but in this case resulted in an
infinite loop on aarch64 multiarch/memory.c with FEAT_LSE2 enabled.

r~

Richard Henderson (2):
  tcg/tci: Adjust passing of MemOpIdx
  tcg/tci: Adjust call-clobbered regs for int128_t

 tcg/tci.c                | 30 +++++++++++++-----------------
 tcg/tci/tcg-target.c.inc | 30 +++++++++---------------------
 2 files changed, 22 insertions(+), 38 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx
  2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
@ 2023-06-07  5:46 ` Richard Henderson
  2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
  2023-06-07  5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

Since adding MO_ATOM_MASK, the maximum MemOpIdx requires 15 bits,
which overflows the 12 bit field allocated for TCI memory ops.
Expand the field to 16 bits for 2-operand memory ops, and place
the value in TCG_REG_TMP for 3-operand memory ops (same as we
already do for 4-operand memory ops).

Cures a debug assert for aarch64, with FEAT_LSE enabled.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tci.c                | 30 +++++++++++++-----------------
 tcg/tci/tcg-target.c.inc | 21 ++++-----------------
 2 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/tcg/tci.c b/tcg/tci.c
index 813572ff39..4640902c88 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -106,7 +106,7 @@ static void tci_args_rrm(uint32_t insn, TCGReg *r0,
 {
     *r0 = extract32(insn, 8, 4);
     *r1 = extract32(insn, 12, 4);
-    *m2 = extract32(insn, 20, 12);
+    *m2 = extract32(insn, 16, 16);
 }
 
 static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
@@ -141,15 +141,6 @@ static void tci_args_rrrc(uint32_t insn,
     *c3 = extract32(insn, 20, 4);
 }
 
-static void tci_args_rrrm(uint32_t insn,
-                          TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3)
-{
-    *r0 = extract32(insn, 8, 4);
-    *r1 = extract32(insn, 12, 4);
-    *r2 = extract32(insn, 16, 4);
-    *m3 = extract32(insn, 20, 12);
-}
-
 static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
                            TCGReg *r2, uint8_t *i3, uint8_t *i4)
 {
@@ -929,8 +920,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 taddr = tci_uint64(regs[r2], regs[r1]);
+                oi = regs[r3];
             }
         do_ld_i32:
             regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
@@ -941,8 +933,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = (uint32_t)regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 taddr = (uint32_t)regs[r2];
+                oi = regs[r3];
             }
             goto do_ld_i64;
         case INDEX_op_qemu_ld_a64_i64:
@@ -972,8 +965,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 taddr = tci_uint64(regs[r2], regs[r1]);
+                oi = regs[r3];
             }
         do_st_i32:
             tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
@@ -985,9 +979,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tmp64 = regs[r0];
                 taddr = (uint32_t)regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 tmp64 = tci_uint64(regs[r1], regs[r0]);
                 taddr = (uint32_t)regs[r2];
+                oi = regs[r3];
             }
             goto do_st_i64;
         case INDEX_op_qemu_st_a64_i64:
@@ -1293,9 +1288,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
                                op_name, str_r(r0), str_r(r1), oi);
             break;
         case 3:
-            tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
-            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %x",
-                               op_name, str_r(r0), str_r(r1), str_r(r2), oi);
+            tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
+                               op_name, str_r(r0), str_r(r1),
+                               str_r(r2), str_r(r3));
             break;
         case 4:
             tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index c9516a5e8b..5b456e1277 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -331,11 +331,11 @@ static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
 {
     tcg_insn_unit insn = 0;
 
-    tcg_debug_assert(m2 == extract32(m2, 0, 12));
+    tcg_debug_assert(m2 == extract32(m2, 0, 16));
     insn = deposit32(insn, 0, 8, op);
     insn = deposit32(insn, 8, 4, r0);
     insn = deposit32(insn, 12, 4, r1);
-    insn = deposit32(insn, 20, 12, m2);
+    insn = deposit32(insn, 16, 16, m2);
     tcg_out32(s, insn);
 }
 
@@ -392,20 +392,6 @@ static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
     tcg_out32(s, insn);
 }
 
-static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
-                            TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
-{
-    tcg_insn_unit insn = 0;
-
-    tcg_debug_assert(m3 == extract32(m3, 0, 12));
-    insn = deposit32(insn, 0, 8, op);
-    insn = deposit32(insn, 8, 4, r0);
-    insn = deposit32(insn, 12, 4, r1);
-    insn = deposit32(insn, 16, 4, r2);
-    insn = deposit32(insn, 20, 12, m3);
-    tcg_out32(s, insn);
-}
-
 static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
                              TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
 {
@@ -860,7 +846,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (TCG_TARGET_REG_BITS == 64) {
             tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
         } else {
-            tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
+            tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP);
         }
         break;
     case INDEX_op_qemu_ld_a64_i64:
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t
  2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
@ 2023-06-07  5:46 ` Richard Henderson
  2023-06-07  7:44   ` Philippe Mathieu-Daudé
  2023-06-07  5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2 siblings, 1 reply; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

We require either 2 or 4 registers to hold int128_t.
Failure to do so results in a register allocation assert.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tci/tcg-target.c.inc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 5b456e1277..0037f904f1 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -179,8 +179,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 }
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R2,
-    TCG_REG_R3,
     TCG_REG_R4,
     TCG_REG_R5,
     TCG_REG_R6,
@@ -193,6 +191,9 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R13,
     TCG_REG_R14,
     TCG_REG_R15,
+    /* Either 2 or 4 of these are call clobbered, so use them last. */
+    TCG_REG_R3,
+    TCG_REG_R2,
     TCG_REG_R1,
     TCG_REG_R0,
 };
@@ -934,11 +935,11 @@ static void tcg_target_init(TCGContext *s)
     /*
      * The interpreter "registers" are in the local stack frame and
      * cannot be clobbered by the called helper functions.  However,
-     * the interpreter assumes a 64-bit return value and assigns to
+     * the interpreter assumes a 128-bit return value and assigns to
      * the return value registers.
      */
     tcg_target_call_clobber_regs =
-        MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
+        MAKE_64BIT_MASK(TCG_REG_R0, 128 / TCG_TARGET_REG_BITS);
 
     s->reserved_regs = 0;
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/2] tcg/tci: Two regression fixes
  2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
  2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
@ 2023-06-07  5:53 ` Richard Henderson
  2 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

On 6/6/23 22:46, Richard Henderson wrote:
> Two recent regressions, both related to recent tcg changes.
> 
> Our CI does not test TCI with --enable-debug-tcg, which given timeout
> constraints is probably correct, but in this case resulted in an
> infinite loop on aarch64 multiarch/memory.c with FEAT_LSE2 enabled.

To expand on that: with --enable-debug-tcg, assertions fire and catch the bug; without the 
assertions, the generated bytecode is incorrect, which leads to the loop.


r~


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t
  2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
@ 2023-06-07  7:44   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 5+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-06-07  7:44 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: sw

On 7/6/23 07:46, Richard Henderson wrote:
> We require either 2 or 4 registers to hold int128_t.
> Failure to do so results in a register allocation assert.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/tci/tcg-target.c.inc | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-07  7:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
2023-06-07  7:44   ` Philippe Mathieu-Daudé
2023-06-07  5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).