From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, "Alex Bennée" <alex.bennee@linaro.org>
Subject: [PULL 1/3] tcg: Do not kill globals at conditional branches
Date: Tue, 27 Oct 2020 09:51:12 -0700 [thread overview]
Message-ID: <20201027165114.71508-2-richard.henderson@linaro.org> (raw)
In-Reply-To: <20201027165114.71508-1-richard.henderson@linaro.org>
We can easily register allocate the entire extended basic block
(in this case, the set of blocks connected by fallthru), simply
by not discarding the register state at the branch.
This does not help blocks starting with a label, as they are
reached via a taken branch, and that would require saving the
complete register state at the branch.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/tcg/tcg-opc.h | 7 +++---
include/tcg/tcg.h | 4 +++-
tcg/tcg.c | 55 +++++++++++++++++++++++++++++++++++++++++--
3 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index e3929b80d2..67092e82c6 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -81,7 +81,7 @@ DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
-DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
@@ -89,7 +89,8 @@ DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
-DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(brcond2_i32, 0, 4, 2,
+ TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
@@ -159,7 +160,7 @@ DEF(extrh_i64_i32, 1, 1, 0,
IMPL(TCG_TARGET_HAS_extrh_i64_i32)
| (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 8804a8c4a2..8ff9dad4ef 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -990,7 +990,7 @@ typedef struct TCGArgConstraint {
#define TCG_MAX_OP_ARGS 16
-/* Bits for TCGOpDef->flags, 8 bits available. */
+/* Bits for TCGOpDef->flags, 8 bits available, all used. */
enum {
/* Instruction exits the translation block. */
TCG_OPF_BB_EXIT = 0x01,
@@ -1008,6 +1008,8 @@ enum {
TCG_OPF_NOT_PRESENT = 0x20,
/* Instruction operands are vectors. */
TCG_OPF_VECTOR = 0x40,
+ /* Instruction is a conditional branch. */
+ TCG_OPF_COND_BRANCH = 0x80
};
typedef struct TCGOpDef {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a8c28440e2..f49f1a7f35 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2519,6 +2519,28 @@ static void la_global_sync(TCGContext *s, int ng)
}
}
+/*
+ * liveness analysis: conditional branch: all temps are dead,
+ * globals and local temps should be synced.
+ */
+static void la_bb_sync(TCGContext *s, int ng, int nt)
+{
+ la_global_sync(s, ng);
+
+ for (int i = ng; i < nt; ++i) {
+ if (s->temps[i].temp_local) {
+ int state = s->temps[i].state;
+ s->temps[i].state = state | TS_MEM;
+ if (state != TS_DEAD) {
+ continue;
+ }
+ } else {
+ s->temps[i].state = TS_DEAD;
+ }
+ la_reset_pref(&s->temps[i]);
+ }
+}
+
/* liveness analysis: sync globals back to memory and kill. */
static void la_global_kill(TCGContext *s, int ng)
{
@@ -2795,6 +2817,8 @@ static void liveness_pass_1(TCGContext *s)
/* If end of basic block, update. */
if (def->flags & TCG_OPF_BB_EXIT) {
la_func_end(s, nb_globals, nb_temps);
+ } else if (def->flags & TCG_OPF_COND_BRANCH) {
+ la_bb_sync(s, nb_globals, nb_temps);
} else if (def->flags & TCG_OPF_BB_END) {
la_bb_end(s, nb_globals, nb_temps);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
@@ -2907,7 +2931,10 @@ static bool liveness_pass_2(TCGContext *s)
nb_oargs = def->nb_oargs;
/* Set flags similar to how calls require. */
- if (def->flags & TCG_OPF_BB_END) {
+ if (def->flags & TCG_OPF_COND_BRANCH) {
+ /* Like reading globals: sync_globals */
+ call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+ } else if (def->flags & TCG_OPF_BB_END) {
/* Like writing globals: save_globals */
call_flags = 0;
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
@@ -3379,6 +3406,28 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
save_globals(s, allocated_regs);
}
+/*
+ * At a conditional branch, we assume all temporaries are dead and
+ * all globals and local temps are synced to their location.
+ */
+static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
+{
+ sync_globals(s, allocated_regs);
+
+ for (int i = s->nb_globals; i < s->nb_temps; i++) {
+ TCGTemp *ts = &s->temps[i];
+ /*
+ * The liveness analysis already ensures that temps are dead.
+ * Keep tcg_debug_asserts for safety.
+ */
+ if (ts->temp_local) {
+ tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
+ } else {
+ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
+ }
+ }
+}
+
/*
* Specialized code generation for INDEX_op_movi_*.
*/
@@ -3730,7 +3779,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
}
}
- if (def->flags & TCG_OPF_BB_END) {
+ if (def->flags & TCG_OPF_COND_BRANCH) {
+ tcg_reg_alloc_cbranch(s, i_allocated_regs);
+ } else if (def->flags & TCG_OPF_BB_END) {
tcg_reg_alloc_bb_end(s, i_allocated_regs);
} else {
if (def->flags & TCG_OPF_CALL_CLOBBER) {
--
2.25.1
next prev parent reply other threads:[~2020-10-27 16:54 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-27 16:51 [PULL 0/3] tcg patch queue Richard Henderson
2020-10-27 16:51 ` Richard Henderson [this message]
2020-10-27 16:51 ` [PULL 2/3] tcg/optimize: Flush data at labels not TCG_OPF_BB_END Richard Henderson
2020-10-27 16:51 ` [PULL 3/3] accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile() Richard Henderson
2020-10-31 9:48 ` [PULL 0/3] tcg patch queue Peter Maydell
2020-11-02 13:57 ` Peter Maydell
2020-11-02 16:14 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201027165114.71508-2-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=alex.bennee@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).