* [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes
2023-11-02 20:10 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
@ 2023-11-02 20:10 ` Taylor Simpson
0 siblings, 0 replies; 5+ messages in thread
From: Taylor Simpson @ 2023-11-02 20:10 UTC (permalink / raw)
To: qemu-devel; +Cc: Taylor Simpson
We divide gen_analyze_funcs.py into 3 phases
Declare the operands
Analyze the register reads
Analyze the register writes
We also create special versions of ctx_log_*_read for new operands
Check that the operand is written before the read
This is a precursor to improving the analysis for short-circuiting
the packet semantics in a subsequent commit
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 24 ++-
target/hexagon/README | 7 +-
target/hexagon/gen_analyze_funcs.py | 221 +++++++++++-----------------
3 files changed, 111 insertions(+), 141 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4dd59c6726..b2fe3a048d 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -75,6 +75,8 @@ typedef struct DisasContext {
TCGv dczero_addr;
} DisasContext;
+bool is_gather_store_insn(DisasContext *ctx);
+
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{
if (!test_bit(pnum, ctx->pregs_written)) {
@@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
set_bit(pnum, ctx->pregs_read);
}
+static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
+{
+ g_assert(test_bit(pnum, ctx->pregs_written));
+ set_bit(pnum, ctx->pregs_read);
+}
+
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
bool is_predicated)
{
@@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->regs_read);
}
+static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(test_bit(rnum, ctx->regs_written));
+ set_bit(rnum, ctx->regs_read);
+}
+
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_reg_read(ctx, rnum);
@@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->vregs_read);
}
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(is_gather_store_insn(ctx) ||
+ test_bit(rnum, ctx->vregs_updated) ||
+ test_bit(rnum, ctx->vregs_select) ||
+ test_bit(rnum, ctx->vregs_updated_tmp));
+ set_bit(rnum, ctx->vregs_read);
+}
+
static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_vreg_read(ctx, rnum ^ 0);
@@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
-bool is_gather_store_insn(DisasContext *ctx);
void process_store(DisasContext *ctx, int slot_num);
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
diff --git a/target/hexagon/README b/target/hexagon/README
index 69b2ffe9bb..7dd74629eb 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -183,10 +183,11 @@ when the override is present.
}
We also generate an analyze_<tag> function for each instruction. Currently,
-these functions record the writes to registers by calling ctx_log_*. During
+these functions record the reads and writes to registers by calling ctx_log_*. During
gen_start_packet, we invoke the analyze_<tag> function for each instruction in
-the packet, and we mark the implicit writes. After the analysis is performed,
-we initialize the result register for each of the predicated assignments.
+the packet, and we mark the implicit writes. The analysis determines if the packet
+semantics can be short-circuited. If not, we initialize the result register for each
+of the predicated assignments.
In addition to instruction semantics, we use a generator to create the decode
tree. This generation is also a two step process. The first step is to run
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index c3b521abef..40b9473c44 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -22,157 +22,90 @@
import string
import hex_common
-
##
## Helpers for gen_analyze_func
##
def is_predicated(tag):
return "A_CONDEXEC" in hex_common.attribdict[tag]
+def vreg_write_type(tag):
+ newv = "EXT_DFL"
+ if hex_common.is_new_result(tag):
+ newv = "EXT_NEW"
+ elif hex_common.is_tmp_result(tag):
+ newv = "EXT_TMP"
+ return newv
-def analyze_opn_old(f, tag, regtype, regid, regno):
+def declare_regn(f, tag, regtype, regid, regno):
regN = f"{regtype}{regid}N"
- predicated = "true" if is_predicated(tag) else "false"
- if regtype == "R":
- if regid in {"ss", "tt"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
- elif regid in {"dd", "ee", "xx", "yy"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
- elif regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- elif regid in {"d", "e", "x", "y"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "P":
- if regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_read(ctx, {regN});\n")
- elif regid in {"d", "e", "x"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_write(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "C":
- if regid == "ss":
- f.write(
- f" const int {regN} = insn->regno[{regno}] "
- "+ HEX_REG_SA0;\n"
- )
+ if regtype == "C":
+ f.write(
+ f" const int {regN} = insn->regno[{regno}] "
+ "+ HEX_REG_SA0;\n"
+ )
+ else:
+ f.write(f" const int {regN} = insn->regno[{regno}];\n")
+
+def analyze_read(f, tag, regtype, regid, regno):
+ regN = f"{regtype}{regid}N"
+ if hex_common.is_pair(regid):
+ if regtype in {"R", "C"}:
f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
- elif regid == "dd":
- f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
- f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
- elif regid == "s":
- f.write(
- f" const int {regN} = insn->regno[{regno}] "
- "+ HEX_REG_SA0;\n"
- )
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- elif regid == "d":
- f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
- f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "M":
- if regid == "u":
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "V":
- newv = "EXT_DFL"
- if hex_common.is_new_result(tag):
- newv = "EXT_NEW"
- elif hex_common.is_tmp_result(tag):
- newv = "EXT_TMP"
- if regid in {"dd", "xx"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(
- f" ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n"
- )
- elif regid in {"uu", "vv"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
+ elif regtype == "V":
f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n")
- elif regid in {"s", "u", "v", "w"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
- elif regid in {"d", "x", "y"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n")
else:
hex_common.bad_register(regtype, regid)
- elif regtype == "Q":
- if regid in {"d", "e", "x"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
- elif regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "G":
- if regid in {"dd"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"d"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"ss"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"s"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "S":
- if regid in {"dd"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"d"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"ss"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"s"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
+ elif hex_common.is_single(regid):
+ if hex_common.is_old_val(regtype, regid, tag):
+ if regtype in {"R", "C", "M"}:
+ f.write(f" ctx_log_reg_read(ctx, {regN});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_read(ctx, {regN});\n")
+ elif regtype in {"V", "O"}:
+ f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ elif regtype == "Q":
+ f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
+ else:
+ hex_common.bad_register(regtype, regid)
+ elif hex_common.is_new_val(regtype, regid, tag):
+ if regtype == "N":
+ f.write(f" ctx_log_reg_read_new(ctx, {regN});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_read_new(ctx, {regN});\n")
+ elif regtype == "O":
+ f.write(f" ctx_log_vreg_read_new(ctx, {regN});\n")
+ else:
+ hex_common.bad_register(regtype, regid)
else:
hex_common.bad_register(regtype, regid)
else:
hex_common.bad_register(regtype, regid)
-
-def analyze_opn_new(f, tag, regtype, regid, regno):
+def analyze_write(f, tag, regtype, regid, regno):
regN = f"{regtype}{regid}N"
- if regtype == "N":
- if regid in {"s", "t"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "P":
- if regid in {"t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "O":
- if regid == "s":
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ predicated = "true" if is_predicated(tag) else "false"
+ if hex_common.is_pair(regid):
+ if regtype in {"R", "C"}:
+ f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
+ elif regtype == "V":
+ f.write(
+ f" ctx_log_vreg_write_pair(ctx, {regN}, "
+ f"{vreg_write_type(tag)}, {predicated});\n"
+ )
else:
hex_common.bad_register(regtype, regid)
- else:
- hex_common.bad_register(regtype, regid)
-
-
-def analyze_opn(f, tag, regtype, regid, i):
- if hex_common.is_pair(regid):
- analyze_opn_old(f, tag, regtype, regid, i)
elif hex_common.is_single(regid):
- if hex_common.is_old_val(regtype, regid, tag):
- analyze_opn_old(f, tag, regtype, regid, i)
- elif hex_common.is_new_val(regtype, regid, tag):
- analyze_opn_new(f, tag, regtype, regid, i)
+ if regtype in {"R", "C"}:
+ f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_write(ctx, {regN});\n")
+ elif regtype == "V":
+ f.write(
+ f" ctx_log_vreg_write(ctx, {regN}, "
+ f"{vreg_write_type(tag)}, {predicated});\n"
+ )
+ elif regtype == "Q":
+ f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
else:
hex_common.bad_register(regtype, regid)
else:
@@ -187,11 +120,11 @@ def analyze_opn(f, tag, regtype, regid, i):
## {
## Insn *insn G_GNUC_UNUSED = ctx->insn;
## const int RdN = insn->regno[0];
-## ctx_log_reg_write(ctx, RdN, false);
## const int RsN = insn->regno[1];
-## ctx_log_reg_read(ctx, RsN);
## const int RtN = insn->regno[2];
+## ctx_log_reg_read(ctx, RsN);
## ctx_log_reg_read(ctx, RtN);
+## ctx_log_reg_write(ctx, RdN, false);
## }
##
def gen_analyze_func(f, tag, regs, imms):
@@ -200,10 +133,24 @@ def gen_analyze_func(f, tag, regs, imms):
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+ ## Declare the operands
+ i = 0
+ for regtype, regid in regs:
+ declare_regn(f, tag, regtype, regid, i)
+ i += 1
+
+ ## Analyze the register reads
+ i = 0
+ for regtype, regid in regs:
+ if hex_common.is_read(regid):
+ analyze_read(f, tag, regtype, regid, i)
+ i += 1
+
+ ## Analyze the register writes
i = 0
- ## Analyze all the registers
for regtype, regid in regs:
- analyze_opn(f, tag, regtype, regid, i)
+ if hex_common.is_written(regid):
+ analyze_write(f, tag, regtype, regid, i)
i += 1
has_generated_helper = not hex_common.skip_qemu_helper(
@@ -240,13 +187,13 @@ def main():
tagimms = hex_common.get_tagimms()
with open(sys.argv[-1], "w") as f:
- f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
- f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+ f.write("#ifndef HEXAGON_ANALYZE_FUNCS_H\n")
+ f.write("#define HEXAGON_ANALYZE_FUNCS_H\n\n")
for tag in hex_common.tags:
gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
- f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
+ f.write("#endif /* HEXAGON_ANALYZE_FUNCS_H */\n")
if __name__ == "__main__":
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets
@ 2023-11-03 16:22 Taylor Simpson
2023-11-03 16:22 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Taylor Simpson @ 2023-11-03 16:22 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, richard.henderson, philmd, ale, anjo,
ltaylorsimpson
This patch series improves the set of packets that can short-circuit
the commit packet logic and write the results directly during the
execution of each instruction in the packet.
The key observation is that checking for overlap between register reads
and writes is different from read-after-write. For example, this packet
{ R0 = add(R0,R1); R6 = add(R6,R7) }
has an overlap between the reads and writes without doing a read after a
write. Therefore, it is safe to write directly into the destination
registers during instruction execution.
Another example is a .new register read. These can read from either the
destination register or a temporary location.
HVX instructions with generated helpers require special handling.
The semantics of the helpers are pass-by-reference, so we still need the
overlap check for these.
Taylor Simpson (3):
Hexagon (target/hexagon) Analyze reads before writes
Hexagon (target/hexagon) Enable more short-circuit packets (scalar
core)
Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
target/hexagon/translate.h | 117 ++++++++++---
target/hexagon/translate.c | 75 +--------
target/hexagon/README | 7 +-
target/hexagon/gen_analyze_funcs.py | 252 ++++++++++++----------------
target/hexagon/gen_tcg_funcs.py | 2 +-
target/hexagon/hex_common.py | 10 ++
6 files changed, 227 insertions(+), 236 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes
2023-11-03 16:22 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
@ 2023-11-03 16:22 ` Taylor Simpson
2023-11-03 16:22 ` [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
2023-11-03 16:22 ` [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
2 siblings, 0 replies; 5+ messages in thread
From: Taylor Simpson @ 2023-11-03 16:22 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, richard.henderson, philmd, ale, anjo,
ltaylorsimpson
We divide gen_analyze_funcs.py into 3 phases
Declare the operands
Analyze the register reads
Analyze the register writes
We also create special versions of ctx_log_*_read for new operands
Check that the operand is written before the read
This is a precursor to improving the analysis for short-circuiting
the packet semantics in a subsequent commit
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 24 ++-
target/hexagon/README | 7 +-
target/hexagon/gen_analyze_funcs.py | 221 +++++++++++-----------------
3 files changed, 111 insertions(+), 141 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4dd59c6726..b2fe3a048d 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -75,6 +75,8 @@ typedef struct DisasContext {
TCGv dczero_addr;
} DisasContext;
+bool is_gather_store_insn(DisasContext *ctx);
+
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{
if (!test_bit(pnum, ctx->pregs_written)) {
@@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
set_bit(pnum, ctx->pregs_read);
}
+static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
+{
+ g_assert(test_bit(pnum, ctx->pregs_written));
+ set_bit(pnum, ctx->pregs_read);
+}
+
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
bool is_predicated)
{
@@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->regs_read);
}
+static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(test_bit(rnum, ctx->regs_written));
+ set_bit(rnum, ctx->regs_read);
+}
+
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_reg_read(ctx, rnum);
@@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->vregs_read);
}
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(is_gather_store_insn(ctx) ||
+ test_bit(rnum, ctx->vregs_updated) ||
+ test_bit(rnum, ctx->vregs_select) ||
+ test_bit(rnum, ctx->vregs_updated_tmp));
+ set_bit(rnum, ctx->vregs_read);
+}
+
static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_vreg_read(ctx, rnum ^ 0);
@@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
-bool is_gather_store_insn(DisasContext *ctx);
void process_store(DisasContext *ctx, int slot_num);
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
diff --git a/target/hexagon/README b/target/hexagon/README
index 69b2ffe9bb..7dd74629eb 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -183,10 +183,11 @@ when the override is present.
}
We also generate an analyze_<tag> function for each instruction. Currently,
-these functions record the writes to registers by calling ctx_log_*. During
+these functions record the reads and writes to registers by calling ctx_log_*. During
gen_start_packet, we invoke the analyze_<tag> function for each instruction in
-the packet, and we mark the implicit writes. After the analysis is performed,
-we initialize the result register for each of the predicated assignments.
+the packet, and we mark the implicit writes. The analysis determines if the packet
+semantics can be short-circuited. If not, we initialize the result register for each
+of the predicated assignments.
In addition to instruction semantics, we use a generator to create the decode
tree. This generation is also a two step process. The first step is to run
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index c3b521abef..40b9473c44 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -22,157 +22,90 @@
import string
import hex_common
-
##
## Helpers for gen_analyze_func
##
def is_predicated(tag):
return "A_CONDEXEC" in hex_common.attribdict[tag]
+def vreg_write_type(tag):
+ newv = "EXT_DFL"
+ if hex_common.is_new_result(tag):
+ newv = "EXT_NEW"
+ elif hex_common.is_tmp_result(tag):
+ newv = "EXT_TMP"
+ return newv
-def analyze_opn_old(f, tag, regtype, regid, regno):
+def declare_regn(f, tag, regtype, regid, regno):
regN = f"{regtype}{regid}N"
- predicated = "true" if is_predicated(tag) else "false"
- if regtype == "R":
- if regid in {"ss", "tt"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
- elif regid in {"dd", "ee", "xx", "yy"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
- elif regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- elif regid in {"d", "e", "x", "y"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "P":
- if regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_read(ctx, {regN});\n")
- elif regid in {"d", "e", "x"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_write(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "C":
- if regid == "ss":
- f.write(
- f" const int {regN} = insn->regno[{regno}] "
- "+ HEX_REG_SA0;\n"
- )
+ if regtype == "C":
+ f.write(
+ f" const int {regN} = insn->regno[{regno}] "
+ "+ HEX_REG_SA0;\n"
+ )
+ else:
+ f.write(f" const int {regN} = insn->regno[{regno}];\n")
+
+def analyze_read(f, tag, regtype, regid, regno):
+ regN = f"{regtype}{regid}N"
+ if hex_common.is_pair(regid):
+ if regtype in {"R", "C"}:
f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
- elif regid == "dd":
- f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
- f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
- elif regid == "s":
- f.write(
- f" const int {regN} = insn->regno[{regno}] "
- "+ HEX_REG_SA0;\n"
- )
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- elif regid == "d":
- f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
- f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "M":
- if regid == "u":
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "V":
- newv = "EXT_DFL"
- if hex_common.is_new_result(tag):
- newv = "EXT_NEW"
- elif hex_common.is_tmp_result(tag):
- newv = "EXT_TMP"
- if regid in {"dd", "xx"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(
- f" ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n"
- )
- elif regid in {"uu", "vv"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
+ elif regtype == "V":
f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n")
- elif regid in {"s", "u", "v", "w"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
- elif regid in {"d", "x", "y"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n")
else:
hex_common.bad_register(regtype, regid)
- elif regtype == "Q":
- if regid in {"d", "e", "x"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
- elif regid in {"s", "t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "G":
- if regid in {"dd"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"d"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"ss"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"s"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "S":
- if regid in {"dd"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"d"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"ss"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
- elif regid in {"s"}:
- f.write(f"// const int {regN} = insn->regno[{regno}];\n")
+ elif hex_common.is_single(regid):
+ if hex_common.is_old_val(regtype, regid, tag):
+ if regtype in {"R", "C", "M"}:
+ f.write(f" ctx_log_reg_read(ctx, {regN});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_read(ctx, {regN});\n")
+ elif regtype in {"V", "O"}:
+ f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ elif regtype == "Q":
+ f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
+ else:
+ hex_common.bad_register(regtype, regid)
+ elif hex_common.is_new_val(regtype, regid, tag):
+ if regtype == "N":
+ f.write(f" ctx_log_reg_read_new(ctx, {regN});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_read_new(ctx, {regN});\n")
+ elif regtype == "O":
+ f.write(f" ctx_log_vreg_read_new(ctx, {regN});\n")
+ else:
+ hex_common.bad_register(regtype, regid)
else:
hex_common.bad_register(regtype, regid)
else:
hex_common.bad_register(regtype, regid)
-
-def analyze_opn_new(f, tag, regtype, regid, regno):
+def analyze_write(f, tag, regtype, regid, regno):
regN = f"{regtype}{regid}N"
- if regtype == "N":
- if regid in {"s", "t"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_reg_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "P":
- if regid in {"t", "u", "v"}:
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_pred_read(ctx, {regN});\n")
- else:
- hex_common.bad_register(regtype, regid)
- elif regtype == "O":
- if regid == "s":
- f.write(f" const int {regN} = insn->regno[{regno}];\n")
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ predicated = "true" if is_predicated(tag) else "false"
+ if hex_common.is_pair(regid):
+ if regtype in {"R", "C"}:
+ f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
+ elif regtype == "V":
+ f.write(
+ f" ctx_log_vreg_write_pair(ctx, {regN}, "
+ f"{vreg_write_type(tag)}, {predicated});\n"
+ )
else:
hex_common.bad_register(regtype, regid)
- else:
- hex_common.bad_register(regtype, regid)
-
-
-def analyze_opn(f, tag, regtype, regid, i):
- if hex_common.is_pair(regid):
- analyze_opn_old(f, tag, regtype, regid, i)
elif hex_common.is_single(regid):
- if hex_common.is_old_val(regtype, regid, tag):
- analyze_opn_old(f, tag, regtype, regid, i)
- elif hex_common.is_new_val(regtype, regid, tag):
- analyze_opn_new(f, tag, regtype, regid, i)
+ if regtype in {"R", "C"}:
+ f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n")
+ elif regtype == "P":
+ f.write(f" ctx_log_pred_write(ctx, {regN});\n")
+ elif regtype == "V":
+ f.write(
+ f" ctx_log_vreg_write(ctx, {regN}, "
+ f"{vreg_write_type(tag)}, {predicated});\n"
+ )
+ elif regtype == "Q":
+ f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
else:
hex_common.bad_register(regtype, regid)
else:
@@ -187,11 +120,11 @@ def analyze_opn(f, tag, regtype, regid, i):
## {
## Insn *insn G_GNUC_UNUSED = ctx->insn;
## const int RdN = insn->regno[0];
-## ctx_log_reg_write(ctx, RdN, false);
## const int RsN = insn->regno[1];
-## ctx_log_reg_read(ctx, RsN);
## const int RtN = insn->regno[2];
+## ctx_log_reg_read(ctx, RsN);
## ctx_log_reg_read(ctx, RtN);
+## ctx_log_reg_write(ctx, RdN, false);
## }
##
def gen_analyze_func(f, tag, regs, imms):
@@ -200,10 +133,24 @@ def gen_analyze_func(f, tag, regs, imms):
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+ ## Declare the operands
+ i = 0
+ for regtype, regid in regs:
+ declare_regn(f, tag, regtype, regid, i)
+ i += 1
+
+ ## Analyze the register reads
+ i = 0
+ for regtype, regid in regs:
+ if hex_common.is_read(regid):
+ analyze_read(f, tag, regtype, regid, i)
+ i += 1
+
+ ## Analyze the register writes
i = 0
- ## Analyze all the registers
for regtype, regid in regs:
- analyze_opn(f, tag, regtype, regid, i)
+ if hex_common.is_written(regid):
+ analyze_write(f, tag, regtype, regid, i)
i += 1
has_generated_helper = not hex_common.skip_qemu_helper(
@@ -240,13 +187,13 @@ def main():
tagimms = hex_common.get_tagimms()
with open(sys.argv[-1], "w") as f:
- f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
- f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+ f.write("#ifndef HEXAGON_ANALYZE_FUNCS_H\n")
+ f.write("#define HEXAGON_ANALYZE_FUNCS_H\n\n")
for tag in hex_common.tags:
gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
- f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
+ f.write("#endif /* HEXAGON_ANALYZE_FUNCS_H */\n")
if __name__ == "__main__":
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
2023-11-03 16:22 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2023-11-03 16:22 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
@ 2023-11-03 16:22 ` Taylor Simpson
2023-11-03 16:22 ` [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
2 siblings, 0 replies; 5+ messages in thread
From: Taylor Simpson @ 2023-11-03 16:22 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, richard.henderson, philmd, ale, anjo,
ltaylorsimpson
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 13 +++++++------
target/hexagon/translate.c | 19 +++----------------
target/hexagon/gen_tcg_funcs.py | 2 +-
3 files changed, 11 insertions(+), 23 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index b2fe3a048d..7bb19ee672 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -38,12 +38,10 @@ typedef struct DisasContext {
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
- DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS);
- DECLARE_BITMAP(pregs_read, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
@@ -68,6 +66,7 @@ typedef struct DisasContext {
bool is_tight_loop;
bool short_circuit;
bool has_hvx_helper;
+ bool read_after_write;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
{
- set_bit(pnum, ctx->pregs_read);
+ if (test_bit(pnum, ctx->pregs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
{
g_assert(test_bit(pnum, ctx->pregs_written));
- set_bit(pnum, ctx->pregs_read);
}
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
@@ -125,13 +125,14 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
{
- set_bit(rnum, ctx->regs_read);
+ if (test_bit(rnum, ctx->regs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
{
g_assert(test_bit(rnum, ctx->regs_written));
- set_bit(rnum, ctx->regs_read);
}
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 666c061180..9dab26ee17 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
}
}
- /* Check for overlap between register reads and writes */
- for (int i = 0; i < ctx->reg_log_idx; i++) {
- int rnum = ctx->reg_log[i];
- if (test_bit(rnum, ctx->regs_read)) {
- return true;
- }
- }
-
- /* Check for overlap between predicate reads and writes */
- for (int i = 0; i < ctx->preg_log_idx; i++) {
- int pnum = ctx->preg_log[i];
- if (test_bit(pnum, ctx->pregs_read)) {
- return true;
- }
+ if (ctx->read_after_write) {
+ return true;
}
/* Check for overlap between HVX reads and writes */
@@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
ctx->has_hvx_helper = false;
+ ctx->read_after_write = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
ctx->next_PC = next_PC;
ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
- bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS);
- bitmap_zero(ctx->pregs_read, NUM_PREGS);
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index f5246cee6d..3d24ae7960 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -198,7 +198,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno):
if regid in {"t", "u", "v"}:
f.write(
f" TCGv {regtype}{regid}N = "
- f"ctx->new_pred_value[insn->regno[{regno}]];\n"
+ f"get_result_pred(ctx, insn->regno[{regno}]);\n"
)
else:
hex_common.bad_register(regtype, regid)
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
2023-11-03 16:22 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2023-11-03 16:22 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
2023-11-03 16:22 ` [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
@ 2023-11-03 16:22 ` Taylor Simpson
2 siblings, 0 replies; 5+ messages in thread
From: Taylor Simpson @ 2023-11-03 16:22 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, richard.henderson, philmd, ale, anjo,
ltaylorsimpson
Look for read-after-write instead of overlap of reads and writes
HVX instructions with helpers have pass-by-reference semantics, so
we check for overlaps of reads and writes within the same instruction.
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 88 +++++++++++++++++++++++------
target/hexagon/translate.c | 58 ++-----------------
target/hexagon/gen_analyze_funcs.py | 51 +++++++++++------
target/hexagon/hex_common.py | 10 ++++
4 files changed, 120 insertions(+), 87 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 7bb19ee672..7f47db71e8 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -50,23 +50,27 @@ typedef struct DisasContext {
int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS];
int vreg_log_idx;
+ DECLARE_BITMAP(vregs_written, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
DECLARE_BITMAP(vregs_select, NUM_VREGS);
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
- DECLARE_BITMAP(vregs_read, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
int qreg_log[NUM_QREGS];
int qreg_log_idx;
- DECLARE_BITMAP(qregs_read, NUM_QREGS);
+ DECLARE_BITMAP(qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
bool pre_commit;
bool need_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
bool short_circuit;
- bool has_hvx_helper;
bool read_after_write;
+ bool has_hvx_overlap;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -146,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
+static inline void ctx_start_hvx_insn(DisasContext *ctx)
+{
+ bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
+ bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
+ bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
+ bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
+}
+
static inline void ctx_log_vreg_write(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_written);
+ if (test_bit(rnum, ctx->insn_vregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->vregs_written);
if (type != EXT_TMP) {
if (!test_bit(rnum, ctx->vregs_updated)) {
ctx->vreg_log[ctx->vreg_log_idx] = rnum;
@@ -175,42 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
- ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
- ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
+ ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
+ ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
}
-static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(rnum, ctx->vregs_written)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
+ bool has_helper)
{
g_assert(is_gather_store_insn(ctx) ||
test_bit(rnum, ctx->vregs_updated) ||
test_bit(rnum, ctx->vregs_select) ||
test_bit(rnum, ctx->vregs_updated_tmp));
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (is_gather_store_insn(ctx)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- ctx_log_vreg_read(ctx, rnum ^ 0);
- ctx_log_vreg_read(ctx, rnum ^ 1);
+ ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
+ ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
}
static inline void ctx_log_qreg_write(DisasContext *ctx,
- int rnum)
+ int rnum, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_qregs_written);
+ if (test_bit(rnum, ctx->insn_qregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->qregs_written);
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_log_idx++;
}
-static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
+static inline void ctx_log_qreg_read(DisasContext *ctx,
+ int qnum, bool has_helper)
{
- set_bit(qnum, ctx->qregs_read);
+ if (has_helper) {
+ set_bit(qnum, ctx->insn_qregs_read);
+ if (test_bit(qnum, ctx->insn_qregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(qnum, ctx->qregs_written)) {
+ ctx->read_after_write = true;
+ }
}
extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 9dab26ee17..3545480080 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -378,60 +378,10 @@ static bool need_commit(DisasContext *ctx)
return true;
}
- if (pkt->num_insns == 1) {
- if (pkt->pkt_has_hvx) {
- /*
- * The HVX instructions with generated helpers use
- * pass-by-reference, so they need the read/write overlap
- * check below.
- * The HVX instructions with overrides are OK.
- */
- if (!ctx->has_hvx_helper) {
- return false;
- }
- } else {
- return false;
- }
- }
-
- if (ctx->read_after_write) {
+ if (ctx->read_after_write || ctx->has_hvx_overlap) {
return true;
}
- /* Check for overlap between HVX reads and writes */
- for (int i = 0; i < ctx->vreg_log_idx; i++) {
- int vnum = ctx->vreg_log[i];
- if (test_bit(vnum, ctx->vregs_read)) {
- return true;
- }
- }
- if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
- }
- }
- if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
- }
- }
-
- /* Check for overlap between HVX predicate reads and writes */
- for (int i = 0; i < ctx->qreg_log_idx; i++) {
- int qnum = ctx->qreg_log[i];
- if (test_bit(qnum, ctx->qregs_read)) {
- return true;
- }
- }
-
return false;
}
@@ -453,8 +403,8 @@ static void mark_implicit_pred_reads(DisasContext *ctx)
static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
- ctx->has_hvx_helper = false;
ctx->read_after_write = false;
+ ctx->has_hvx_overlap = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -485,13 +435,13 @@ static void gen_start_packet(DisasContext *ctx)
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
+ bitmap_zero(ctx->vregs_written, NUM_VREGS);
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
bitmap_zero(ctx->vregs_updated, NUM_VREGS);
bitmap_zero(ctx->vregs_select, NUM_VREGS);
bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
- bitmap_zero(ctx->vregs_read, NUM_VREGS);
- bitmap_zero(ctx->qregs_read, NUM_QREGS);
+ bitmap_zero(ctx->qregs_written, NUM_QREGS);
ctx->qreg_log_idx = 0;
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index 40b9473c44..4541174590 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -52,7 +52,10 @@ def analyze_read(f, tag, regtype, regid, regno):
if regtype in {"R", "C"}:
f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n")
elif regtype == "V":
- f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_vreg_read_pair(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
elif hex_common.is_single(regid):
@@ -62,9 +65,15 @@ def analyze_read(f, tag, regtype, regid, regno):
elif regtype == "P":
f.write(f" ctx_log_pred_read(ctx, {regN});\n")
elif regtype in {"V", "O"}:
- f.write(f" ctx_log_vreg_read(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_vreg_read(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
elif regtype == "Q":
- f.write(f" ctx_log_qreg_read(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_qreg_read(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
elif hex_common.is_new_val(regtype, regid, tag):
@@ -73,7 +82,10 @@ def analyze_read(f, tag, regtype, regid, regno):
elif regtype == "P":
f.write(f" ctx_log_pred_read_new(ctx, {regN});\n")
elif regtype == "O":
- f.write(f" ctx_log_vreg_read_new(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_vreg_read_new(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
else:
@@ -90,7 +102,8 @@ def analyze_write(f, tag, regtype, regid, regno):
elif regtype == "V":
f.write(
f" ctx_log_vreg_write_pair(ctx, {regN}, "
- f"{vreg_write_type(tag)}, {predicated});\n"
+ f"{vreg_write_type(tag)}, {predicated}, "
+ "insn_has_hvx_helper);\n"
)
else:
hex_common.bad_register(regtype, regid)
@@ -102,10 +115,14 @@ def analyze_write(f, tag, regtype, regid, regno):
elif regtype == "V":
f.write(
f" ctx_log_vreg_write(ctx, {regN}, "
- f"{vreg_write_type(tag)}, {predicated});\n"
+ f"{vreg_write_type(tag)}, {predicated}, "
+ "insn_has_hvx_helper);\n"
)
elif regtype == "Q":
- f.write(f" ctx_log_qreg_write(ctx, {regN});\n")
+ f.write(
+ f" ctx_log_qreg_write(ctx, {regN}, "
+ "insn_has_hvx_helper);\n"
+ )
else:
hex_common.bad_register(regtype, regid)
else:
@@ -132,6 +149,17 @@ def gen_analyze_func(f, tag, regs, imms):
f.write("{\n")
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+ if (hex_common.is_hvx_insn(tag)):
+ if hex_common.has_hvx_helper(tag):
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
+ )
+ f.write(" ctx_start_hvx_insn(ctx);\n")
+ else:
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
+ )
+
## Declare the operands
i = 0
@@ -153,15 +181,6 @@ def gen_analyze_func(f, tag, regs, imms):
analyze_write(f, tag, regtype, regid, i)
i += 1
- has_generated_helper = not hex_common.skip_qemu_helper(
- tag
- ) and not hex_common.is_idef_parser_enabled(tag)
-
- ## Mark HVX instructions with generated helpers
- if (has_generated_helper and
- "A_CVI" in hex_common.attribdict[tag]):
- f.write(" ctx->has_hvx_helper = true;\n")
-
f.write("}\n\n")
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 0da65d6dd6..befe3590c2 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -307,6 +307,16 @@ def is_idef_parser_enabled(tag):
return tag in idef_parser_enabled
+def is_hvx_insn(tag):
+ return "A_CVI" in attribdict[tag]
+
+
+def has_hvx_helper(tag):
+ return (is_hvx_insn(tag) and
+ not skip_qemu_helper(tag) and
+ not is_idef_parser_enabled(tag))
+
+
def imm_name(immlett):
return f"{immlett}iV"
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-11-03 16:24 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-03 16:22 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2023-11-03 16:22 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
2023-11-03 16:22 ` [PATCH 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
2023-11-03 16:22 ` [PATCH 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
-- strict thread matches above, loose matches on Subject: below --
2023-11-02 20:10 [PATCH 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2023-11-02 20:10 ` [PATCH 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).