From: Taylor Simpson <tsimpson@quicinc.com>
To: qemu-devel@nongnu.org
Cc: tsimpson@quicinc.com, richard.henderson@linaro.org,
philmd@linaro.org, ale@rev.ng, anjo@rev.ng, bcain@quicinc.com,
quic_mathbern@quicinc.com
Subject: [PATCH v4 11/11] Hexagon (target/hexagon) Use direct block chaining for tight loops
Date: Mon, 7 Nov 2022 20:05:52 -0800 [thread overview]
Message-ID: <20221108040552.22175-12-tsimpson@quicinc.com> (raw)
In-Reply-To: <20221108040552.22175-1-tsimpson@quicinc.com>
Direct block chaining is documented here
https://qemu.readthedocs.io/en/latest/devel/tcg.html#direct-block-chaining
Hexagon inner loops end with the endloop0 instruction
To go back to the beginning of the loop, this instructions writes to PC
from register SA0 (start address 0). To use direct block chaining, we
have to assign PC with a constant value. So, we specialize the code
generation when the start of the translation block is equal to SA0.
When this is the case, we defer the compare/branch from endloop0 to
gen_end_tb. When this is done, we can assign the start address of the TB
to PC.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
target/hexagon/cpu.h | 13 +++---
target/hexagon/gen_tcg.h | 3 ++
target/hexagon/translate.h | 1 +
target/hexagon/genptr.c | 84 ++++++++++++++++++++++++++++++++++++++
target/hexagon/translate.c | 34 +++++++++++++++
5 files changed, 130 insertions(+), 5 deletions(-)
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index ff8c26272d..1d89e11a1a 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -25,6 +25,7 @@
#include "mmvec/mmvec.h"
#include "qom/object.h"
#include "hw/core/cpu.h"
+#include "hw/registerfields.h"
#define NUM_PREGS 4
#define TOTAL_PER_THREAD_REGS 64
@@ -152,16 +153,18 @@ struct ArchCPU {
#include "cpu_bits.h"
+FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1)
+
static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
{
+ uint32_t hex_flags = 0;
*pc = env->gpr[HEX_REG_PC];
*cs_base = 0;
-#ifdef CONFIG_USER_ONLY
- *flags = 0;
-#else
-#error System mode not supported on Hexagon yet
-#endif
+ if (*pc == env->gpr[HEX_REG_SA0]) {
+ hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1);
+ }
+ *flags = hex_flags;
}
static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 17431403a9..3a12c12ef1 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -620,6 +620,9 @@
#define fGEN_TCG_J2_callf(SHORTCODE) \
gen_cond_call(ctx, PuV, TCG_COND_NE, riV)
+#define fGEN_TCG_J2_endloop0(SHORTCODE) \
+ gen_endloop0(ctx)
+
/*
* Compound compare and jump instructions
* Here is a primer to understand the tag names
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index aacf0b0921..d971f4f095 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -59,6 +59,7 @@ typedef struct DisasContext {
bool pre_commit;
TCGCond branch_cond;
target_ulong branch_dest;
+ bool is_tight_loop;
} DisasContext;
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 584e6415c0..34b2a52bb0 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -497,6 +497,33 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off,
}
}
+static void gen_set_usr_field(int field, TCGv val)
+{
+ tcg_gen_deposit_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR],
+ val,
+ reg_field_info[field].offset,
+ reg_field_info[field].width);
+}
+
+static void gen_set_usr_fieldi(int field, int x)
+{
+ if (reg_field_info[field].width == 1) {
+ target_ulong bit = 1 << reg_field_info[field].offset;
+ if ((x & 1) == 1) {
+ tcg_gen_ori_tl(hex_new_value[HEX_REG_USR],
+ hex_new_value[HEX_REG_USR],
+ bit);
+ } else {
+ tcg_gen_andi_tl(hex_new_value[HEX_REG_USR],
+ hex_new_value[HEX_REG_USR],
+ ~bit);
+ }
+ } else {
+ TCGv val = tcg_constant_tl(x);
+ gen_set_usr_field(field, val);
+ }
+}
+
static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2)
{
TCGv one = tcg_constant_tl(0xff);
@@ -636,6 +663,63 @@ static void gen_cond_call(DisasContext *ctx, TCGv pred,
gen_set_label(skip);
}
+static void gen_endloop0(DisasContext *ctx)
+{
+ TCGv lpcfg = tcg_temp_local_new();
+
+ GET_USR_FIELD(USR_LPCFG, lpcfg);
+
+ /*
+ * if (lpcfg == 1) {
+ * hex_new_pred_value[3] = 0xff;
+ * hex_pred_written |= 1 << 3;
+ * }
+ */
+ TCGLabel *label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1);
+ {
+ tcg_gen_movi_tl(hex_new_pred_value[3], 0xff);
+ tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3);
+ }
+ gen_set_label(label1);
+
+ /*
+ * if (lpcfg) {
+ * SET_USR_FIELD(USR_LPCFG, lpcfg - 1);
+ * }
+ */
+ TCGLabel *label2 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2);
+ {
+ tcg_gen_subi_tl(lpcfg, lpcfg, 1);
+ SET_USR_FIELD(USR_LPCFG, lpcfg);
+ }
+ gen_set_label(label2);
+
+ /*
+ * If we're in a tight loop, we'll do this at the end of the TB to take
+ * advantage of direct block chaining.
+ */
+ if (!ctx->is_tight_loop) {
+ /*
+ * if (hex_gpr[HEX_REG_LC0] > 1) {
+ * PC = hex_gpr[HEX_REG_SA0];
+ * hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1;
+ * }
+ */
+ TCGLabel *label3 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
+ {
+ gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]);
+ tcg_gen_subi_tl(hex_new_value[HEX_REG_LC0],
+ hex_gpr[HEX_REG_LC0], 1);
+ }
+ gen_set_label(label3);
+ }
+
+ tcg_temp_free(lpcfg);
+}
+
static void gen_cmp_jumpnv(DisasContext *ctx,
TCGCond cond, TCGv val, TCGv src, int pc_off)
{
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 8e5814a3ea..c6ba96af43 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -135,6 +135,8 @@ static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest)
static void gen_end_tb(DisasContext *ctx)
{
+ Packet *pkt = ctx->pkt;
+
gen_exec_counters(ctx);
if (ctx->branch_cond != TCG_COND_NEVER) {
@@ -147,6 +149,18 @@ static void gen_end_tb(DisasContext *ctx)
} else {
gen_goto_tb(ctx, 0, ctx->branch_dest);
}
+ } else if (ctx->is_tight_loop &&
+ pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
+ /*
+ * When we're in a tight loop, we defer the endloop0 processing
+ * to take advantage of direct block chaining
+ */
+ TCGLabel *skip = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
+ tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
+ gen_goto_tb(ctx, 0, ctx->base.tb->pc);
+ gen_set_label(skip);
+ gen_goto_tb(ctx, 1, ctx->next_PC);
} else {
tcg_gen_lookup_and_goto_ptr();
}
@@ -337,6 +351,15 @@ static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
*/
bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
rnum == HEX_REG_USR;
+
+ /* LC0/LC1 is conditionally written by endloop instructions */
+ if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
+ (opcode == J2_endloop0 ||
+ opcode == J2_endloop1 ||
+ opcode == J2_endloop01)) {
+ is_predicated = true;
+ }
+
if (is_predicated && !is_preloaded(ctx, rnum)) {
tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
}
@@ -420,6 +443,14 @@ static void gen_reg_writes(DisasContext *ctx)
int reg_num = ctx->reg_log[i];
tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
+
+ /*
+ * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
+ * If we write to SA0, we have to turn off tight loop handling.
+ */
+ if (reg_num == HEX_REG_SA0) {
+ ctx->is_tight_loop = false;
+ }
}
}
@@ -843,7 +874,10 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
{
DisasContext *ctx = container_of(db, DisasContext, base);
+ uint32_t hex_flags = db->tb->flags;
+
ctx->branch_cond = TCG_COND_NEVER;
+ ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
}
static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
--
2.17.1
prev parent reply other threads:[~2022-11-08 4:08 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-08 4:05 [PATCH v4 00/11] Hexagon (target/hexagon) performance and bug fixes Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 01/11] Hexagon (target/hexagon) Add pkt and insn to DisasContext Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 02/11] Hexagon (target/hexagon) Fix predicated assignment to .tmp and .cur Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 03/11] Hexagon (target/hexagon) Add overrides for S2_asr_r_r_sat/S2_asl_r_r_sat Taylor Simpson
2022-11-08 7:19 ` Richard Henderson
2022-11-08 15:41 ` Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 04/11] Hexagon (target/hexagon) Only use branch_taken when packet has multi cof Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 05/11] Hexagon (target/hexagon) Remove PC from the runtime state Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 06/11] Hexagon (target/hexagon) Remove next_PC from " Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 07/11] Hexagon (target/hexagon) Add overrides for direct call instructions Taylor Simpson
2022-11-08 7:20 ` Richard Henderson
2022-11-08 4:05 ` [PATCH v4 08/11] Hexagon (target/hexagon) Add overrides for compound compare and jump Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 09/11] Hexagon (target/hexagon) Add overrides for various forms of jump Taylor Simpson
2022-11-08 4:05 ` [PATCH v4 10/11] Hexagon (target/hexagon) Use direct block chaining for direct jump/branch Taylor Simpson
2022-11-08 7:24 ` Richard Henderson
2022-11-08 15:41 ` Taylor Simpson
2022-11-09 1:41 ` Richard Henderson
2022-11-09 15:35 ` Taylor Simpson
2022-11-08 4:05 ` Taylor Simpson [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221108040552.22175-12-tsimpson@quicinc.com \
--to=tsimpson@quicinc.com \
--cc=ale@rev.ng \
--cc=anjo@rev.ng \
--cc=bcain@quicinc.com \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=quic_mathbern@quicinc.com \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).