qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PULL 0/3] tcg patch queue
@ 2020-10-27 16:51 Richard Henderson
  2020-10-27 16:51 ` [PULL 1/3] tcg: Do not kill globals at conditional branches Richard Henderson
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Richard Henderson @ 2020-10-27 16:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

The following changes since commit 4a74626970ab4ea475263d155b10fb75c9af0b33:

  Merge remote-tracking branch 'remotes/stefanha-gitlab/tags/tracing-pull-request' into staging (2020-10-27 11:28:46 +0000)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201027

for you to fetch changes up to 1d705e8a5bbfe36294081baa45ab68a9ad987f33:

  accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile() (2020-10-27 09:48:07 -0700)

----------------------------------------------------------------
Optimize across branches.
Add logging for cpu_io_recompile.

----------------------------------------------------------------
Peter Maydell (1):
      accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile()

Richard Henderson (2):
      tcg: Do not kill globals at conditional branches
      tcg/optimize: Flush data at labels not TCG_OPF_BB_END

 include/tcg/tcg-opc.h     |  7 +++---
 include/tcg/tcg.h         |  4 +++-
 accel/tcg/translate-all.c |  4 ++++
 tcg/optimize.c            | 35 +++++++++++++++---------------
 tcg/tcg.c                 | 55 +++++++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 82 insertions(+), 23 deletions(-)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PULL 1/3] tcg: Do not kill globals at conditional branches
  2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
@ 2020-10-27 16:51 ` Richard Henderson
  2020-10-27 16:51 ` [PULL 2/3] tcg/optimize: Flush data at labels not TCG_OPF_BB_END Richard Henderson
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-10-27 16:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Alex Bennée

We can easily register allocate the entire extended basic block
(in this case, the set of blocks connected by fallthru), simply
by not discarding the register state at the branch.

This does not help blocks starting with a label, as they are
reached via a taken branch, and that would require saving the
complete register state at the branch.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-opc.h |  7 +++---
 include/tcg/tcg.h     |  4 +++-
 tcg/tcg.c             | 55 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index e3929b80d2..67092e82c6 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -81,7 +81,7 @@ DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
 DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
 DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
 
-DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
 
 DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
 DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
@@ -89,7 +89,8 @@ DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
 DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
 DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
 DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
-DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(brcond2_i32, 0, 4, 2,
+    TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
 DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
 
 DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
@@ -159,7 +160,7 @@ DEF(extrh_i64_i32, 1, 1, 0,
     IMPL(TCG_TARGET_HAS_extrh_i64_i32)
     | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
 
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
 DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
 DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
 DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 8804a8c4a2..8ff9dad4ef 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -990,7 +990,7 @@ typedef struct TCGArgConstraint {
 
 #define TCG_MAX_OP_ARGS 16
 
-/* Bits for TCGOpDef->flags, 8 bits available.  */
+/* Bits for TCGOpDef->flags, 8 bits available, all used.  */
 enum {
     /* Instruction exits the translation block.  */
     TCG_OPF_BB_EXIT      = 0x01,
@@ -1008,6 +1008,8 @@ enum {
     TCG_OPF_NOT_PRESENT  = 0x20,
     /* Instruction operands are vectors.  */
     TCG_OPF_VECTOR       = 0x40,
+    /* Instruction is a conditional branch. */
+    TCG_OPF_COND_BRANCH  = 0x80
 };
 
 typedef struct TCGOpDef {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a8c28440e2..f49f1a7f35 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2519,6 +2519,28 @@ static void la_global_sync(TCGContext *s, int ng)
     }
 }
 
+/*
+ * liveness analysis: conditional branch: all temps are dead,
+ * globals and local temps should be synced.
+ */
+static void la_bb_sync(TCGContext *s, int ng, int nt)
+{
+    la_global_sync(s, ng);
+
+    for (int i = ng; i < nt; ++i) {
+        if (s->temps[i].temp_local) {
+            int state = s->temps[i].state;
+            s->temps[i].state = state | TS_MEM;
+            if (state != TS_DEAD) {
+                continue;
+            }
+        } else {
+            s->temps[i].state = TS_DEAD;
+        }
+        la_reset_pref(&s->temps[i]);
+    }
+}
+
 /* liveness analysis: sync globals back to memory and kill.  */
 static void la_global_kill(TCGContext *s, int ng)
 {
@@ -2795,6 +2817,8 @@ static void liveness_pass_1(TCGContext *s)
             /* If end of basic block, update.  */
             if (def->flags & TCG_OPF_BB_EXIT) {
                 la_func_end(s, nb_globals, nb_temps);
+            } else if (def->flags & TCG_OPF_COND_BRANCH) {
+                la_bb_sync(s, nb_globals, nb_temps);
             } else if (def->flags & TCG_OPF_BB_END) {
                 la_bb_end(s, nb_globals, nb_temps);
             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
@@ -2907,7 +2931,10 @@ static bool liveness_pass_2(TCGContext *s)
             nb_oargs = def->nb_oargs;
 
             /* Set flags similar to how calls require.  */
-            if (def->flags & TCG_OPF_BB_END) {
+            if (def->flags & TCG_OPF_COND_BRANCH) {
+                /* Like reading globals: sync_globals */
+                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+            } else if (def->flags & TCG_OPF_BB_END) {
                 /* Like writing globals: save_globals */
                 call_flags = 0;
             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
@@ -3379,6 +3406,28 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
     save_globals(s, allocated_regs);
 }
 
+/*
+ * At a conditional branch, we assume all temporaries are dead and
+ * all globals and local temps are synced to their location.
+ */
+static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
+{
+    sync_globals(s, allocated_regs);
+
+    for (int i = s->nb_globals; i < s->nb_temps; i++) {
+        TCGTemp *ts = &s->temps[i];
+        /*
+         * The liveness analysis already ensures that temps are dead.
+         * Keep tcg_debug_asserts for safety.
+         */
+        if (ts->temp_local) {
+            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
+        } else {
+            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
+        }
+    }
+}
+
 /*
  * Specialized code generation for INDEX_op_movi_*.
  */
@@ -3730,7 +3779,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         }
     }
 
-    if (def->flags & TCG_OPF_BB_END) {
+    if (def->flags & TCG_OPF_COND_BRANCH) {
+        tcg_reg_alloc_cbranch(s, i_allocated_regs);
+    } else if (def->flags & TCG_OPF_BB_END) {
         tcg_reg_alloc_bb_end(s, i_allocated_regs);
     } else {
         if (def->flags & TCG_OPF_CALL_CLOBBER) {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PULL 2/3] tcg/optimize: Flush data at labels not TCG_OPF_BB_END
  2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
  2020-10-27 16:51 ` [PULL 1/3] tcg: Do not kill globals at conditional branches Richard Henderson
@ 2020-10-27 16:51 ` Richard Henderson
  2020-10-27 16:51 ` [PULL 3/3] accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile() Richard Henderson
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-10-27 16:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Alex Bennée

We can easily propagate temp values through the entire extended
basic block (in this case, the set of blocks connected by fallthru),
simply by not discarding the register state at the branch.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 220f4601d5..9952c28bdc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1484,29 +1484,30 @@ void tcg_optimize(TCGContext *s)
                     }
                 }
             }
-            goto do_reset_output;
+            /* fall through */
 
         default:
         do_default:
-            /* Default case: we know nothing about operation (or were unable
-               to compute the operation result) so no propagation is done.
-               We trash everything if the operation is the end of a basic
-               block, otherwise we only trash the output args.  "mask" is
-               the non-zero bits mask for the first output arg.  */
-            if (def->flags & TCG_OPF_BB_END) {
-                bitmap_zero(temps_used.l, nb_temps);
-            } else {
-        do_reset_output:
-                for (i = 0; i < nb_oargs; i++) {
-                    reset_temp(op->args[i]);
-                    /* Save the corresponding known-zero bits mask for the
-                       first output argument (only one supported so far). */
-                    if (i == 0) {
-                        arg_info(op->args[i])->mask = mask;
-                    }
+            /*
+             * Default case: we know nothing about operation (or were unable
+             * to compute the operation result) so no propagation is done.
+             */
+            for (i = 0; i < nb_oargs; i++) {
+                reset_temp(op->args[i]);
+                /*
+                 * Save the corresponding known-zero bits mask for the
+                 * first output argument (only one supported so far).
+                 */
+                if (i == 0) {
+                    arg_info(op->args[i])->mask = mask;
                 }
             }
             break;
+
+        case INDEX_op_set_label:
+            /* Trash everything at the start of a new extended bb. */
+            bitmap_zero(temps_used.l, nb_temps);
+            break;
         }
 
         /* Eliminate duplicate and redundant fence instructions.  */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PULL 3/3] accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile()
  2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
  2020-10-27 16:51 ` [PULL 1/3] tcg: Do not kill globals at conditional branches Richard Henderson
  2020-10-27 16:51 ` [PULL 2/3] tcg/optimize: Flush data at labels not TCG_OPF_BB_END Richard Henderson
@ 2020-10-27 16:51 ` Richard Henderson
  2020-10-31  9:48 ` [PULL 0/3] tcg patch queue Peter Maydell
  2020-11-02 13:57 ` Peter Maydell
  4 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-10-27 16:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

From: Peter Maydell <peter.maydell@linaro.org>

When using -icount, it's useful for the CPU_LOG_EXEC logging
to include information about when cpu_io_recompile() was
called, because it alerts the reader of the log that the
tracing of a previous TB execution may not actually
correspond to an actually executed instruction. For instance
if you're using -icount and also -singlestep then a guest
instruction that makes an IO access appears in two
"Trace" lines, once in a TB that triggers the cpu_io_recompile()
and then again in the TB that actually executes.

(This is a similar reason to why the "Stopped execution of
TB chain before..." logging in cpu_tb_exec() is helpful
when trying to track execution flow in the logs.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20201013122658.4620-1-peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/translate-all.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index d76097296d..4572b4901f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -2267,6 +2267,10 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
         tb_destroy(tb);
     }
 
+    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
+                           "cpu_io_recompile: rewound execution of TB to "
+                           TARGET_FMT_lx "\n", tb->pc);
+
     /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
      * the first in the TB) then we end up generating a whole new TB and
      *  repeating the fault, which is horribly inefficient.
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PULL 0/3] tcg patch queue
  2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
                   ` (2 preceding siblings ...)
  2020-10-27 16:51 ` [PULL 3/3] accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile() Richard Henderson
@ 2020-10-31  9:48 ` Peter Maydell
  2020-11-02 13:57 ` Peter Maydell
  4 siblings, 0 replies; 7+ messages in thread
From: Peter Maydell @ 2020-10-31  9:48 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers

On Tue, 27 Oct 2020 at 16:51, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The following changes since commit 4a74626970ab4ea475263d155b10fb75c9af0b33:
>
>   Merge remote-tracking branch 'remotes/stefanha-gitlab/tags/tracing-pull-request' into staging (2020-10-27 11:28:46 +0000)
>
> are available in the Git repository at:
>
>   https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201027
>
> for you to fetch changes up to 1d705e8a5bbfe36294081baa45ab68a9ad987f33:
>
>   accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile() (2020-10-27 09:48:07 -0700)
>
> ----------------------------------------------------------------
> Optimize across branches.
> Add logging for cpu_io_recompile.
>
> ----------------------------------------------------------------
> Peter Maydell (1):
>       accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile()
>
> Richard Henderson (2):
>       tcg: Do not kill globals at conditional branches
>       tcg/optimize: Flush data at labels not TCG_OPF_BB_END


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.2
for any user-visible changes.

-- PMM


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PULL 0/3] tcg patch queue
  2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
                   ` (3 preceding siblings ...)
  2020-10-31  9:48 ` [PULL 0/3] tcg patch queue Peter Maydell
@ 2020-11-02 13:57 ` Peter Maydell
  2020-11-02 16:14   ` Richard Henderson
  4 siblings, 1 reply; 7+ messages in thread
From: Peter Maydell @ 2020-11-02 13:57 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu, QEMU Developers

On Tue, 27 Oct 2020 at 16:51, Richard Henderson
<richard.henderson@linaro.org> wrote:
> ----------------------------------------------------------------
> Optimize across branches.
> Add logging for cpu_io_recompile.

Igor2 reported on IRC that this seems to cause a crash when
using an hppa guest. This is apparently happening on a proprietary
disk image, so no reproducible test case, but the logging of
the tail end of -d in_asm,op is at:
 http://igor2.repo.hu/tmp/in_asm_op.log

QEMU asserts with
../tcg/tcg.c:3346: tcg fatal error

The TB in question involves several conditional branches; the
generated TCG ops look OK to me, and reverting the two commits
b4cb76e6208cf6b5b and cd0372c515c4732d8b fixes the crash.
(We didn't test reverting only one of the two commits separately.)

thanks
-- PMM


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PULL 0/3] tcg patch queue
  2020-11-02 13:57 ` Peter Maydell
@ 2020-11-02 16:14   ` Richard Henderson
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-11-02 16:14 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu, QEMU Developers

On 11/2/20 5:57 AM, Peter Maydell wrote:
> On Tue, 27 Oct 2020 at 16:51, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>> ----------------------------------------------------------------
>> Optimize across branches.
>> Add logging for cpu_io_recompile.
> 
> Igor2 reported on IRC that this seems to cause a crash when
> using an hppa guest. This is apparently happening on a proprietary
> disk image, so no reproducible test case, but the logging of
> the tail end of -d in_asm,op is at:
>  http://igor2.repo.hu/tmp/in_asm_op.log
> 
> QEMU asserts with
> ../tcg/tcg.c:3346: tcg fatal error
> 
> The TB in question involves several conditional branches; the
> generated TCG ops look OK to me, and reverting the two commits
> b4cb76e6208cf6b5b and cd0372c515c4732d8b fixes the crash.
> (We didn't test reverting only one of the two commits separately.)

Ok, thanks, I'll look into it.


r~



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-11-02 16:15 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
2020-10-27 16:51 ` [PULL 1/3] tcg: Do not kill globals at conditional branches Richard Henderson
2020-10-27 16:51 ` [PULL 2/3] tcg/optimize: Flush data at labels not TCG_OPF_BB_END Richard Henderson
2020-10-27 16:51 ` [PULL 3/3] accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile() Richard Henderson
2020-10-31  9:48 ` [PULL 0/3] tcg patch queue Peter Maydell
2020-11-02 13:57 ` Peter Maydell
2020-11-02 16:14   ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).