* [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes
2023-11-02 20:10 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
@ 2023-11-02 20:10 ` Taylor Simpson
2023-11-02 20:10 ` [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Taylor Simpson @ 2023-11-02 20:10 UTC (permalink / raw)
To: qemu-devel; +Cc: Taylor Simpson
We divide gen_analyze_funcs.py into 3 phases
Declare the operands
Analyze the register reads
Analyze the register writes
We also create special versions of ctx_log_*_read for new operands
Check that the operand is written before the read
This is a precursor to improving the analysis for short-circuiting
the packet semantics in a subsequent commit
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 24 ++-
target/hexagon/README | 7 +-
target/hexagon/gen_analyze_funcs.py | 221 +++++++++++-----------------
3 files changed, 111 insertions(+), 141 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4dd59c6726..b2fe3a048d 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -75,6 +75,8 @@ typedef struct DisasContext {
TCGv dczero_addr;
} DisasContext;
+bool is_gather_store_insn(DisasContext *ctx);
+
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{
if (!test_bit(pnum, ctx->pregs_written)) {
@@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
set_bit(pnum, ctx->pregs_read);
}
+static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
+{
+ g_assert(test_bit(pnum, ctx->pregs_written));
+ set_bit(pnum, ctx->pregs_read);
+}
+
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
bool is_predicated)
{
@@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->regs_read);
}
+static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(test_bit(rnum, ctx->regs_written));
+ set_bit(rnum, ctx->regs_read);
+}
+
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_reg_read(ctx, rnum);
@@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->vregs_read);
}
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(is_gather_store_insn(ctx) ||
+ test_bit(rnum, ctx->vregs_updated) ||
+ test_bit(rnum, ctx->vregs_select) ||
+ test_bit(rnum, ctx->vregs_updated_tmp));
+ set_bit(rnum, ctx->vregs_read);
+}
+
static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_vreg_read(ctx, rnum ^ 0);
@@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
-bool is_gather_store_insn(DisasContext *ctx);
void process_store(DisasContext *ctx, int slot_num);
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
diff --git a/target/hexagon/README b/target/hexagon/README
index 69b2ffe9bb..7dd74629eb 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -183,10 +183,11 @@ when the override is present.
}
We also generate an analyze_<tag> function for each instruction. Currently,
-these functions record the writes to registers by calling ctx_log_*. During
+these functions record the reads and writes to registers by calling ctx_log_*. During
gen_start_packet, we invoke the analyze_<tag> function for each instruction in
-the packet, and we mark the implicit writes. After the analysis is performed,
-we initialize the result register for each of the predicated assignments.
+the packet, and we mark the implicit writes. The analysis determines if the packet
+semantics can be short-circuited. If not, we initialize the result register for each
+of the predicated assignments.
In addition to instruction semantics, we use a generator to create the decode
tree. This generation is also a two step process. The first step is to run
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index c3b521abef..40b9473c44 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -22,157 +22,90 @@
import string
import hex_common
-
##
## Helpers for gen_analyze_func
##
def is_predicated(tag):
return "A_CONDEXEC" in hex_common.attribdict[tag]
+def vreg_write_type(tag):
+ newv = "EXT_DFL"
+ if hex_common.is_new_result(tag):
+ newv = "EXT_NEW"
+ elif hex_common.is_tmp_result(tag):
+ newv = "EXT_TMP"
+ return newv
-def analyze_opn_old(f, tag, regtype, regid, regno):
+def declare_regn(f, tag, regtype, regid, regno):
regN = f"{regtype}{regid}N"
- predicated = "true" if is_predicated(tag) else "false"
- if regtype == "R":
- if regid in {"ss", "tt"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
- elif regid in {"dd", "ee", "xx", "yy"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
- elif regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- elif regid in {"d", "e", "x", "y"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "P":
- if regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_read(ctx, {regN});\n")
- elif regid in {"d", "e", "x"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_write(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "C":
- if regid == "ss":
- f.write(
- f" const int {regN} = insn->regno[{regno}] "
- "+ HEX_REG_SA0;\n"
- )
+ if regtype == "C":
+ f.write(
+ f" const int {regN} = insn->regno[{regno}] "
+ "+ HEX_REG_SA0;\n"
+ )
+ else:
+ f.write(f" const int {regN} = insn->regno[{regno}];\n")
+
+def analyze_read(f, tag, regtype, regid, regno):
+ regN = f"{regtype}{regid}N"
+ if hex_common.is_pair(regid):
+ if regtype in {"R", "C"}:
f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
- elif regid == "dd":
- f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
- f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
- elif regid == "s":
- f.write(
- f" const int {regN} = insn->regno[{regno}] "
- "+ HEX_REG_SA0;\n"
- )
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- elif regid == "d":
- f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
- f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "M":
- if regid == "u":
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "V":
- newv = "EXT_DFL"
- if hex_common.is_new_result(tag):
- newv = "EXT_NEW"
- elif hex_common.is_tmp_result(tag):
- newv = "EXT_TMP"
- if regid in {"dd", "xx"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(
- f" ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n"
- )
- elif regid in {"uu", "vv"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
+ elif regtype == "V":
f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n")
- elif regid in {"s", "u", "v", "w"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
- elif regid in {"d", "x", "y"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n")
else:
hex_common.bad_register(regtype, regid)
- elif regtype == "Q":
- if regid in {"d", "e", "x"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
- elif regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "G":
- if regid in {"dd"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"d"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"ss"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"s"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "S":
- if regid in {"dd"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"d"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"ss"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"s"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
+ elif hex_common.is_single(regid):
+ if hex_common.is_old_val(regtype, regid, tag):
+ if regtype in {"R", "C", "M"}:
+ f.write(f" ctx_log_reg_read(ctx, {regN});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_read(ctx, {regN});\n")
+ elif regtype in {"V", "O"}:
+ f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ elif regtype == "Q":
+ f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
+ else:
+ hex_common.bad_register(regtype, regid)
+ elif hex_common.is_new_val(regtype, regid, tag):
+ if regtype == "N":
+ f.write(f" ctx_log_reg_read_new(ctx, {regN});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_read_new(ctx, {regN});\n")
+ elif regtype == "O":
+ f.write(f" ctx_log_vreg_read_new(ctx, {regN});\n")
+ else:
+ hex_common.bad_register(regtype, regid)
else:
hex_common.bad_register(regtype, regid)
else:
hex_common.bad_register(regtype, regid)
-
-def analyze_opn_new(f, tag, regtype, regid, regno):
+def analyze_write(f, tag, regtype, regid, regno):
regN = f"{regtype}{regid}N"
- if regtype == "N":
- if regid in {"s", "t"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "P":
- if regid in {"t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "O":
- if regid == "s":
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ predicated = "true" if is_predicated(tag) else "false"
+ if hex_common.is_pair(regid):
+ if regtype in {"R", "C"}:
+ f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
+ elif regtype == "V":
+ f.write(
+ f" ctx_log_vreg_write_pair(ctx, {regN}, "
+ f"{vreg_write_type(tag)}, {predicated});\n"
+ )
else:
hex_common.bad_register(regtype, regid)
- else:
- hex_common.bad_register(regtype, regid)
-
-
-def analyze_opn(f, tag, regtype, regid, i):
- if hex_common.is_pair(regid):
- analyze_opn_old(f, tag, regtype, regid, i)
elif hex_common.is_single(regid):
- if hex_common.is_old_val(regtype, regid, tag):
- analyze_opn_old(f, tag, regtype, regid, i)
- elif hex_common.is_new_val(regtype, regid, tag):
- analyze_opn_new(f, tag, regtype, regid, i)
+ if regtype in {"R", "C"}:
+ f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_write(ctx, {regN});\n")
+ elif regtype == "V":
+ f.write(
+ f" ctx_log_vreg_write(ctx, {regN}, "
+ f"{vreg_write_type(tag)}, {predicated});\n"
+ )
+ elif regtype == "Q":
+ f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
else:
hex_common.bad_register(regtype, regid)
else:
@@ -187,11 +120,11 @@ def analyze_opn(f, tag, regtype, regid, i):
## {
## Insn *insn G_GNUC_UNUSED = ctx->insn;
## const int RdN = insn->regno[0];
-## ctx_log_reg_write(ctx, RdN, false);
## const int RsN = insn->regno[1];
-## ctx_log_reg_read(ctx, RsN);
## const int RtN = insn->regno[2];
+## ctx_log_reg_read(ctx, RsN);
## ctx_log_reg_read(ctx, RtN);
+## ctx_log_reg_write(ctx, RdN, false);
## }
##
def gen_analyze_func(f, tag, regs, imms):
@@ -200,10 +133,24 @@ def gen_analyze_func(f, tag, regs, imms):
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+ ## Declare the operands
+ i = 0
+ for regtype, regid in regs:
+ declare_regn(f, tag, regtype, regid, i)
+ i += 1
+
+ ## Analyze the register reads
+ i = 0
+ for regtype, regid in regs:
+ if hex_common.is_read(regid):
+ analyze_read(f, tag, regtype, regid, i)
+ i += 1
+
+ ## Analyze the register writes
i = 0
- ## Analyze all the registers
for regtype, regid in regs:
- analyze_opn(f, tag, regtype, regid, i)
+ if hex_common.is_written(regid):
+ analyze_write(f, tag, regtype, regid, i)
i += 1
has_generated_helper = not hex_common.skip_qemu_helper(
@@ -240,13 +187,13 @@ def main():
tagimms = hex_common.get_tagimms()
with open(sys.argv[-1], "w") as f:
- f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
- f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+ f.write("#ifndef HEXAGON_ANALYZE_FUNCS_H\n")
+ f.write("#define HEXAGON_ANALYZE_FUNCS_H\n\n")
for tag in hex_common.tags:
gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
- f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
+ f.write("#endif /* HEXAGON_ANALYZE_FUNCS_H */\n")
if __name__ == "__main__":
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
2023-11-02 20:10 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2023-11-02 20:10 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
@ 2023-11-02 20:10 ` Taylor Simpson
2023-11-02 20:10 ` [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
2023-11-02 23:44 ` [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Anton Johansson via
3 siblings, 0 replies; 7+ messages in thread
From: Taylor Simpson @ 2023-11-02 20:10 UTC (permalink / raw)
To: qemu-devel; +Cc: Taylor Simpson
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 13 +++++++------
target/hexagon/translate.c | 19 +++----------------
target/hexagon/gen_tcg_funcs.py | 2 +-
3 files changed, 11 insertions(+), 23 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index b2fe3a048d..7bb19ee672 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -38,12 +38,10 @@ typedef struct DisasContext {
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
- DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS);
- DECLARE_BITMAP(pregs_read, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
@@ -68,6 +66,7 @@ typedef struct DisasContext {
bool is_tight_loop;
bool short_circuit;
bool has_hvx_helper;
+ bool read_after_write;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
{
- set_bit(pnum, ctx->pregs_read);
+ if (test_bit(pnum, ctx->pregs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
{
g_assert(test_bit(pnum, ctx->pregs_written));
- set_bit(pnum, ctx->pregs_read);
}
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
@@ -125,13 +125,14 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
{
- set_bit(rnum, ctx->regs_read);
+ if (test_bit(rnum, ctx->regs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
{
g_assert(test_bit(rnum, ctx->regs_written));
- set_bit(rnum, ctx->regs_read);
}
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 666c061180..9dab26ee17 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
}
}
- /* Check for overlap between register reads and writes */
- for (int i = 0; i < ctx->reg_log_idx; i++) {
- int rnum = ctx->reg_log[i];
- if (test_bit(rnum, ctx->regs_read)) {
- return true;
- }
- }
-
- /* Check for overlap between predicate reads and writes */
- for (int i = 0; i < ctx->preg_log_idx; i++) {
- int pnum = ctx->preg_log[i];
- if (test_bit(pnum, ctx->pregs_read)) {
- return true;
- }
+ if (ctx->read_after_write) {
+ return true;
}
/* Check for overlap between HVX reads and writes */
@@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
ctx->has_hvx_helper = false;
+ ctx->read_after_write = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
ctx->next_PC = next_PC;
ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
- bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS);
- bitmap_zero(ctx->pregs_read, NUM_PREGS);
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index f5246cee6d..3d24ae7960 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -198,7 +198,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno):
if regid in {"t", "u", "v"}:
f.write(
f" TCGv {regtype}{regid}N = "
- f"ctx->new_pred_value[insn->regno[{regno}]];\n"
+ f"get_result_pred(ctx, insn->regno[{regno}]);\n"
)
else:
hex_common.bad_register(regtype, regid)
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
2023-11-02 20:10 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2023-11-02 20:10 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
2023-11-02 20:10 ` [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
@ 2023-11-02 20:10 ` Taylor Simpson
2023-11-02 23:44 ` [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Anton Johansson via
3 siblings, 0 replies; 7+ messages in thread
From: Taylor Simpson @ 2023-11-02 20:10 UTC (permalink / raw)
To: qemu-devel; +Cc: Taylor Simpson
Look for read-after-write instead of overlap of reads and writes
HVX instructions with helpers have pass-by-reference semantics, so
we check for overlaps of reads and writes within the same instruction.
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 88 +++++++++++++++++++++++------
target/hexagon/translate.c | 58 ++-----------------
target/hexagon/gen_analyze_funcs.py | 51 +++++++++++------
target/hexagon/hex_common.py | 10 ++++
4 files changed, 120 insertions(+), 87 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 7bb19ee672..7f47db71e8 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -50,23 +50,27 @@ typedef struct DisasContext {
int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS];
int vreg_log_idx;
+ DECLARE_BITMAP(vregs_written, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
DECLARE_BITMAP(vregs_select, NUM_VREGS);
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
- DECLARE_BITMAP(vregs_read, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
int qreg_log[NUM_QREGS];
int qreg_log_idx;
- DECLARE_BITMAP(qregs_read, NUM_QREGS);
+ DECLARE_BITMAP(qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
bool pre_commit;
bool need_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
bool short_circuit;
- bool has_hvx_helper;
bool read_after_write;
+ bool has_hvx_overlap;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -146,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
+static inline void ctx_start_hvx_insn(DisasContext *ctx)
+{
+ bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
+ bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
+ bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
+ bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
+}
+
static inline void ctx_log_vreg_write(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_written);
+ if (test_bit(rnum, ctx->insn_vregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->vregs_written);
if (type != EXT_TMP) {
if (!test_bit(rnum, ctx->vregs_updated)) {
ctx->vreg_log[ctx->vreg_log_idx] = rnum;
@@ -175,42 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
- ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
- ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
+ ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
+ ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
}
-static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(rnum, ctx->vregs_written)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
+ bool has_helper)
{
g_assert(is_gather_store_insn(ctx) ||
test_bit(rnum, ctx->vregs_updated) ||
test_bit(rnum, ctx->vregs_select) ||
test_bit(rnum, ctx->vregs_updated_tmp));
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (is_gather_store_insn(ctx)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- ctx_log_vreg_read(ctx, rnum ^ 0);
- ctx_log_vreg_read(ctx, rnum ^ 1);
+ ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
+ ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
}
static inline void ctx_log_qreg_write(DisasContext *ctx,
- int rnum)
+ int rnum, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_qregs_written);
+ if (test_bit(rnum, ctx->insn_qregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->qregs_written);
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_log_idx++;
}
-static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
+static inline void ctx_log_qreg_read(DisasContext *ctx,
+ int qnum, bool has_helper)
{
- set_bit(qnum, ctx->qregs_read);
+ if (has_helper) {
+ set_bit(qnum, ctx->insn_qregs_read);
+ if (test_bit(qnum, ctx->insn_qregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(qnum, ctx->qregs_written)) {
+ ctx->read_after_write = true;
+ }
}
extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 9dab26ee17..3545480080 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -378,60 +378,10 @@ static bool need_commit(DisasContext *ctx)
return true;
}
- if (pkt->num_insns == 1) {
- if (pkt->pkt_has_hvx) {
- /*
- * The HVX instructions with generated helpers use
- * pass-by-reference, so they need the read/write overlap
- * check below.
- * The HVX instructions with overrides are OK.
- */
- if (!ctx->has_hvx_helper) {
- return false;
- }
- } else {
- return false;
- }
- }
-
- if (ctx->read_after_write) {
+ if (ctx->read_after_write || ctx->has_hvx_overlap) {
return true;
}
- /* Check for overlap between HVX reads and writes */
- for (int i = 0; i < ctx->vreg_log_idx; i++) {
- int vnum = ctx->vreg_log[i];
- if (test_bit(vnum, ctx->vregs_read)) {
- return true;
- }
- }
- if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
- }
- }
- if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
- }
- }
-
- /* Check for overlap between HVX predicate reads and writes */
- for (int i = 0; i < ctx->qreg_log_idx; i++) {
- int qnum = ctx->qreg_log[i];
- if (test_bit(qnum, ctx->qregs_read)) {
- return true;
- }
- }
-
return false;
}
@@ -453,8 +403,8 @@ static void mark_implicit_pred_reads(DisasContext *ctx)
static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
- ctx->has_hvx_helper = false;
ctx->read_after_write = false;
+ ctx->has_hvx_overlap = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -485,13 +435,13 @@ static void gen_start_packet(DisasContext *ctx)
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
+ bitmap_zero(ctx->vregs_written, NUM_VREGS);
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
bitmap_zero(ctx->vregs_updated, NUM_VREGS);
bitmap_zero(ctx->vregs_select, NUM_VREGS);
bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
- bitmap_zero(ctx->vregs_read, NUM_VREGS);
- bitmap_zero(ctx->qregs_read, NUM_QREGS);
+ bitmap_zero(ctx->qregs_written, NUM_QREGS);
ctx->qreg_log_idx = 0;
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index 40b9473c44..4541174590 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -52,7 +52,10 @@ def analyze_read(f, tag, regtype, regid, regno):
if regtype in {"R", "C"}:
f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
elif regtype == "V":
- f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_vreg_read_pair(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
elif hex_common.is_single(regid):
@@ -62,9 +65,15 @@ def analyze_read(f, tag, regtype, regid, regno):
elif regtype == "P":
f.write(f" ctx_log_pred_read(ctx, {regN});\n")
elif regtype in {"V", "O"}:
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_vreg_read(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
elif regtype == "Q":
- f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_qreg_read(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
elif hex_common.is_new_val(regtype, regid, tag):
@@ -73,7 +82,10 @@ def analyze_read(f, tag, regtype, regid, regno):
elif regtype == "P":
f.write(f" ctx_log_pred_read_new(ctx, {regN});\n")
elif regtype == "O":
- f.write(f" ctx_log_vreg_read_new(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_vreg_read_new(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
else:
@@ -90,7 +102,8 @@ def analyze_write(f, tag, regtype, regid, regno):
elif regtype == "V":
f.write(
f" ctx_log_vreg_write_pair(ctx, {regN}, "
- f"{vreg_write_type(tag)}, {predicated});\n"
+ f"{vreg_write_type(tag)}, {predicated}, "
+ "insn_has_hvx_helper);\n"
)
else:
hex_common.bad_register(regtype, regid)
@@ -102,10 +115,14 @@ def analyze_write(f, tag, regtype, regid, regno):
elif regtype == "V":
f.write(
f" ctx_log_vreg_write(ctx, {regN}, "
- f"{vreg_write_type(tag)}, {predicated});\n"
+ f"{vreg_write_type(tag)}, {predicated}, "
+ "insn_has_hvx_helper);\n"
)
elif regtype == "Q":
- f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_qreg_write(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
else:
@@ -132,6 +149,17 @@ def gen_analyze_func(f, tag, regs, imms):
f.write("{\n")
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+ if (hex_common.is_hvx_insn(tag)):
+ if hex_common.has_hvx_helper(tag):
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
+ )
+ f.write(" ctx_start_hvx_insn(ctx);\n")
+ else:
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
+ )
+
## Declare the operands
i = 0
@@ -153,15 +181,6 @@ def gen_analyze_func(f, tag, regs, imms):
analyze_write(f, tag, regtype, regid, i)
i += 1
- has_generated_helper = not hex_common.skip_qemu_helper(
- tag
- ) and not hex_common.is_idef_parser_enabled(tag)
-
- ## Mark HVX instructions with generated helpers
- if (has_generated_helper and
- "A_CVI" in hex_common.attribdict[tag]):
- f.write(" ctx->has_hvx_helper = true;\n")
-
f.write("}\n\n")
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 0da65d6dd6..befe3590c2 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -307,6 +307,16 @@ def is_idef_parser_enabled(tag):
return tag in idef_parser_enabled
+def is_hvx_insn(tag):
+ return "A_CVI" in attribdict[tag]
+
+
+def has_hvx_helper(tag):
+ return (is_hvx_insn(tag) and
+ not skip_qemu_helper(tag) and
+ not is_idef_parser_enabled(tag))
+
+
def imm_name(immlett):
return f"{immlett}iV"
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets
2023-11-02 20:10 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
` (2 preceding siblings ...)
2023-11-02 20:10 ` [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
@ 2023-11-02 23:44 ` Anton Johansson via
2023-11-03 15:48 ` ltaylorsimpson
3 siblings, 1 reply; 7+ messages in thread
From: Anton Johansson via @ 2023-11-02 23:44 UTC (permalink / raw)
To: Taylor Simpson; +Cc: qemu-devel
Hi, Taylor!:) Always nice to see your name pop up here. The patches seem
to have been sent as attachments for whatever reason.
/ Anton
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
2023-11-03 16:22 Taylor Simpson
@ 2023-11-03 16:22 ` Taylor Simpson
0 siblings, 0 replies; 7+ messages in thread
From: Taylor Simpson @ 2023-11-03 16:22 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, richard.henderson, philmd, ale, anjo,
ltaylorsimpson
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 13 +++++++------
target/hexagon/translate.c | 19 +++----------------
target/hexagon/gen_tcg_funcs.py | 2 +-
3 files changed, 11 insertions(+), 23 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index b2fe3a048d..7bb19ee672 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -38,12 +38,10 @@ typedef struct DisasContext {
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
- DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS);
- DECLARE_BITMAP(pregs_read, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
@@ -68,6 +66,7 @@ typedef struct DisasContext {
bool is_tight_loop;
bool short_circuit;
bool has_hvx_helper;
+ bool read_after_write;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
{
- set_bit(pnum, ctx->pregs_read);
+ if (test_bit(pnum, ctx->pregs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
{
g_assert(test_bit(pnum, ctx->pregs_written));
- set_bit(pnum, ctx->pregs_read);
}
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
@@ -125,13 +125,14 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
{
- set_bit(rnum, ctx->regs_read);
+ if (test_bit(rnum, ctx->regs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
{
g_assert(test_bit(rnum, ctx->regs_written));
- set_bit(rnum, ctx->regs_read);
}
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 666c061180..9dab26ee17 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
}
}
- /* Check for overlap between register reads and writes */
- for (int i = 0; i < ctx->reg_log_idx; i++) {
- int rnum = ctx->reg_log[i];
- if (test_bit(rnum, ctx->regs_read)) {
- return true;
- }
- }
-
- /* Check for overlap between predicate reads and writes */
- for (int i = 0; i < ctx->preg_log_idx; i++) {
- int pnum = ctx->preg_log[i];
- if (test_bit(pnum, ctx->pregs_read)) {
- return true;
- }
+ if (ctx->read_after_write) {
+ return true;
}
/* Check for overlap between HVX reads and writes */
@@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
ctx->has_hvx_helper = false;
+ ctx->read_after_write = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
ctx->next_PC = next_PC;
ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
- bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS);
- bitmap_zero(ctx->pregs_read, NUM_PREGS);
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index f5246cee6d..3d24ae7960 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -198,7 +198,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno):
if regid in {"t", "u", "v"}:
f.write(
f" TCGv {regtype}{regid}N = "
- f"ctx->new_pred_value[insn->regno[{regno}]];\n"
+ f"get_result_pred(ctx, insn->regno[{regno}]);\n"
)
else:
hex_common.bad_register(regtype, regid)
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread