qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis
@ 2023-04-26  0:42 Taylor Simpson
  2023-04-26  0:42 ` [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes Taylor Simpson
                   ` (9 more replies)
  0 siblings, 10 replies; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

Have gen_analyze_funcs mark the registers that are read by the
instruction.  We also mark the implicit reads using instruction
attributes.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/translate.h          | 36 +++++++++++++++++++++++
 target/hexagon/attribs_def.h.inc    |  6 +++-
 target/hexagon/translate.c          | 20 +++++++++++++
 target/hexagon/gen_analyze_funcs.py | 44 ++++++++++++++++++++---------
 target/hexagon/hex_common.py        |  6 ++++
 5 files changed, 97 insertions(+), 15 deletions(-)

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4b9f21c41d..f72228859f 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -38,10 +38,12 @@ typedef struct DisasContext {
     int reg_log[REG_WRITES_MAX];
     int reg_log_idx;
     DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
+    DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
     DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
     int preg_log[PRED_WRITES_MAX];
     int preg_log_idx;
     DECLARE_BITMAP(pregs_written, NUM_PREGS);
+    DECLARE_BITMAP(pregs_read, NUM_PREGS);
     uint8_t store_width[STORES_MAX];
     bool s1_store_processed;
     int future_vregs_idx;
@@ -55,8 +57,10 @@ typedef struct DisasContext {
     DECLARE_BITMAP(vregs_select, NUM_VREGS);
     DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
     DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
+    DECLARE_BITMAP(vregs_read, NUM_VREGS);
     int qreg_log[NUM_QREGS];
     int qreg_log_idx;
+    DECLARE_BITMAP(qregs_read, NUM_QREGS);
     bool pre_commit;
     TCGCond branch_cond;
     target_ulong branch_dest;
@@ -73,6 +77,11 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
     }
 }
 
+static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
+{
+    set_bit(pnum, ctx->pregs_read);
+}
+
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
                                      bool is_predicated)
 {
@@ -99,6 +108,17 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
     ctx_log_reg_write(ctx, rnum + 1, is_predicated);
 }
 
+static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
+{
+    set_bit(rnum, ctx->regs_read);
+}
+
+static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
+{
+    ctx_log_reg_read(ctx, rnum);
+    ctx_log_reg_read(ctx, rnum + 1);
+}
+
 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
                              int num, bool alloc_ok);
 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
@@ -139,6 +159,17 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
     ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
 }
 
+static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
+{
+    set_bit(rnum, ctx->vregs_read);
+}
+
+static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
+{
+    ctx_log_vreg_read(ctx, rnum ^ 0);
+    ctx_log_vreg_read(ctx, rnum ^ 1);
+}
+
 static inline void ctx_log_qreg_write(DisasContext *ctx,
                                       int rnum)
 {
@@ -146,6 +177,11 @@ static inline void ctx_log_qreg_write(DisasContext *ctx,
     ctx->qreg_log_idx++;
 }
 
+static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
+{
+    set_bit(qnum, ctx->qregs_read);
+}
+
 extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
 extern TCGv hex_pred[NUM_PREGS];
 extern TCGv hex_this_PC;
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index 9874d1658f..17f86e1c32 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -102,6 +102,10 @@ DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1")
 DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2")
 DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3")
 DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "")
+DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "")
+DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "")
+DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "")
+DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "")
 DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "")
 DEF_ATTRIB(WRITES_PRED_REG, "Writes a predicate register", "", "")
 DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "")
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 1f04559f91..004d91f108 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -336,6 +336,21 @@ static void mark_implicit_pred_writes(DisasContext *ctx)
     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
 }
 
+static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
+{
+    if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
+        ctx_log_pred_read(ctx, pnum);
+    }
+}
+
+static void mark_implicit_pred_reads(DisasContext *ctx)
+{
+    mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0);
+    mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1);
+    mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2);
+    mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3);
+}
+
 static void analyze_packet(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
@@ -348,6 +363,7 @@ static void analyze_packet(DisasContext *ctx)
         }
         mark_implicit_reg_writes(ctx);
         mark_implicit_pred_writes(ctx);
+        mark_implicit_pred_reads(ctx);
     }
 }
 
@@ -361,9 +377,11 @@ static void gen_start_packet(DisasContext *ctx)
     ctx->next_PC = next_PC;
     ctx->reg_log_idx = 0;
     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
+    bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
     ctx->preg_log_idx = 0;
     bitmap_zero(ctx->pregs_written, NUM_PREGS);
+    bitmap_zero(ctx->pregs_read, NUM_PREGS);
     ctx->future_vregs_idx = 0;
     ctx->tmp_vregs_idx = 0;
     ctx->vreg_log_idx = 0;
@@ -372,6 +390,8 @@ static void gen_start_packet(DisasContext *ctx)
     bitmap_zero(ctx->vregs_select, NUM_VREGS);
     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
+    bitmap_zero(ctx->vregs_read, NUM_VREGS);
+    bitmap_zero(ctx->qregs_read, NUM_QREGS);
     ctx->qreg_log_idx = 0;
     for (i = 0; i < STORES_MAX; i++) {
         ctx->store_width[i] = 0;
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index c74443da78..86aec5ac4b 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -35,12 +35,14 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
     predicated = "true" if is_predicated(tag) else "false"
     if regtype == "R":
         if regid in {"ss", "tt"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_reg_read_pair(ctx, {regN});\n")
         elif regid in {"dd", "ee", "xx", "yy"}:
             f.write(f"    const int {regN} = insn->regno[{regno}];\n")
             f.write(f"    ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
         elif regid in {"s", "t", "u", "v"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_reg_read(ctx, {regN});\n")
         elif regid in {"d", "e", "x", "y"}:
             f.write(f"    const int {regN} = insn->regno[{regno}];\n")
             f.write(f"    ctx_log_reg_write(ctx, {regN}, {predicated});\n")
@@ -48,7 +50,8 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
             print("Bad register parse: ", regtype, regid)
     elif regtype == "P":
         if regid in {"s", "t", "u", "v"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_pred_read(ctx, {regN});\n")
         elif regid in {"d", "e", "x"}:
             f.write(f"    const int {regN} = insn->regno[{regno}];\n")
             f.write(f"    ctx_log_pred_write(ctx, {regN});\n")
@@ -57,15 +60,19 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
     elif regtype == "C":
         if regid == "ss":
             f.write(
-                f"//    const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n"
+                f"    const int {regN} = insn->regno[{regno}] "
+                "+ HEX_REG_SA0;\n"
             )
+            f.write(f"    ctx_log_reg_read_pair(ctx, {regN});\n")
         elif regid == "dd":
             f.write(f"    const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
             f.write(f"    ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n")
         elif regid == "s":
             f.write(
-                f"//    const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n"
+                f"    const int {regN} = insn->regno[{regno}] "
+                "+ HEX_REG_SA0;\n"
             )
+            f.write(f"    ctx_log_reg_read(ctx, {regN});\n")
         elif regid == "d":
             f.write(f"    const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n")
             f.write(f"    ctx_log_reg_write(ctx, {regN}, {predicated});\n")
@@ -73,7 +80,8 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
             print("Bad register parse: ", regtype, regid)
     elif regtype == "M":
         if regid == "u":
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_reg_read(ctx, {regN});\n")
         else:
             print("Bad register parse: ", regtype, regid)
     elif regtype == "V":
@@ -88,9 +96,11 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
                 f"    ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n"
             )
         elif regid in {"uu", "vv"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_vreg_read_pair(ctx, {regN});\n")
         elif regid in {"s", "u", "v", "w"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_vreg_read(ctx, {regN});\n")
         elif regid in {"d", "x", "y"}:
             f.write(f"    const int {regN} = insn->regno[{regno}];\n")
             f.write(f"    ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n")
@@ -101,7 +111,8 @@ def analyze_opn_old(f, tag, regtype, regid, regno):
             f.write(f"    const int {regN} = insn->regno[{regno}];\n")
             f.write(f"    ctx_log_qreg_write(ctx, {regN});\n")
         elif regid in {"s", "t", "u", "v"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_qreg_read(ctx, {regN});\n")
         else:
             print("Bad register parse: ", regtype, regid)
     elif regtype == "G":
@@ -134,17 +145,20 @@ def analyze_opn_new(f, tag, regtype, regid, regno):
     regN = f"{regtype}{regid}N"
     if regtype == "N":
         if regid in {"s", "t"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_reg_read(ctx, {regN});\n")
         else:
             print("Bad register parse: ", regtype, regid)
     elif regtype == "P":
         if regid in {"t", "u", "v"}:
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_pred_read(ctx, {regN});\n")
         else:
             print("Bad register parse: ", regtype, regid)
     elif regtype == "O":
         if regid == "s":
-            f.write(f"//    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    const int {regN} = insn->regno[{regno}];\n")
+            f.write(f"    ctx_log_vreg_read(ctx, {regN});\n")
         else:
             print("Bad register parse: ", regtype, regid)
     else:
@@ -174,8 +188,10 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i):
 ##         Insn *insn G_GNUC_UNUSED = ctx->insn;
 ##         const int RdN = insn->regno[0];
 ##         ctx_log_reg_write(ctx, RdN, false);
-##     //    const int RsN = insn->regno[1];
-##     //    const int RtN = insn->regno[2];
+##         const int RsN = insn->regno[1];
+##         ctx_log_reg_read(ctx, RsN);
+##         const int RtN = insn->regno[2];
+##         ctx_log_reg_read(ctx, RtN);
 ##     }
 ##
 def gen_analyze_func(f, tag, regs, imms):
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 40f28ca933..232c6e2c20 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -97,6 +97,12 @@ def calculate_attribs():
     add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR")
     add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD")
     add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE")
+    add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0')
+    add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0')
+    add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0')
+    add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1')
+    add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1')
+    add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3')
 
     # Recurse down macros, find attributes from sub-macros
     macroValues = list(macros.values())
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 10:40   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 12/21] Hexagon (target/hexagon) Short-circuit packet predicate writes Taylor Simpson
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr.  We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed.  If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.

We pass the ctx->need_commit to helpers when needed.

Finally, we can early-exit from gen_reg_writes during packet commit.

There are a few instructions whose semantics write to the result before
reading all the inputs.  Therefore, the idef-parser generated code is
incompatible with short-circuit.  We tell idef-parser to skip them.

For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.

Here's a simple example of the TCG generated for
0x004000b4:  0x7800c020 {       R0 = #0x1 }

BEFORE:
 ---- 004000b4
 movi_i32 new_r0,$0x1
 mov_i32 r0,new_r0

AFTER:
 ---- 004000b4
 movi_i32 r0,$0x1

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h                    |  1 +
 target/hexagon/gen_tcg.h                |  3 +-
 target/hexagon/genptr.h                 |  2 +
 target/hexagon/helper.h                 |  2 +-
 target/hexagon/macros.h                 | 13 ++++-
 target/hexagon/translate.h              |  2 +
 target/hexagon/arch.c                   |  3 +-
 target/hexagon/cpu.c                    |  5 +-
 target/hexagon/genptr.c                 | 30 +++++-------
 target/hexagon/op_helper.c              |  5 +-
 target/hexagon/translate.c              | 65 ++++++++++++++++++++++++-
 target/hexagon/gen_helper_funcs.py      |  2 +
 target/hexagon/gen_helper_protos.py     | 10 +++-
 target/hexagon/gen_idef_parser_funcs.py |  7 +++
 target/hexagon/gen_tcg_funcs.py         |  5 ++
 target/hexagon/hex_common.py            |  3 ++
 16 files changed, 128 insertions(+), 30 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 81b663ecfb..9252055a38 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -146,6 +146,7 @@ struct ArchCPU {
 
     bool lldb_compat;
     target_ulong lldb_stack_adjust;
+    bool short_circuit;
 };
 
 #include "cpu_bits.h"
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index dbb64b9259..ec388629b3 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -592,7 +592,8 @@
 #define fGEN_TCG_A5_ACS(SHORTCODE) \
     do { \
         gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \
-        gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \
+        gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \
+                             tcg_constant_tl(ctx->need_commit)); \
     } while (0)
 
 #define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \
diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h
index 75d0fc262d..420867f934 100644
--- a/target/hexagon/genptr.h
+++ b/target/hexagon/genptr.h
@@ -58,4 +58,6 @@ void gen_set_half(int N, TCGv result, TCGv src);
 void gen_set_half_i64(int N, TCGv_i64 result, TCGv src);
 void probe_noshuf_load(TCGv va, int s, int mi);
 
+extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS];
+
 #endif
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index 73849e3d49..4b750d0351 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -29,7 +29,7 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
 DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
 DEF_HELPER_2(sfinvsqrta, i64, env, f32)
-DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64)
+DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32)
 DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64)
 DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64)
 DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64)
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 16e72ed0d5..a68446a367 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -44,8 +44,17 @@
                    reg_field_info[FIELD].offset)
 
 #define SET_USR_FIELD(FIELD, VAL) \
-    fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \
-                 reg_field_info[FIELD].offset, (VAL))
+    do { \
+        if (pkt_need_commit) { \
+            fINSERT_BITS(env->new_value[HEX_REG_USR], \
+                        reg_field_info[FIELD].width, \
+                        reg_field_info[FIELD].offset, (VAL)); \
+        } else { \
+            fINSERT_BITS(env->gpr[HEX_REG_USR], \
+                        reg_field_info[FIELD].width, \
+                        reg_field_info[FIELD].offset, (VAL)); \
+        } \
+    } while (0)
 #endif
 
 #ifdef QEMU_GENERATE
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index f72228859f..3f6fd3452c 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -62,10 +62,12 @@ typedef struct DisasContext {
     int qreg_log_idx;
     DECLARE_BITMAP(qregs_read, NUM_QREGS);
     bool pre_commit;
+    bool need_commit;
     TCGCond branch_cond;
     target_ulong branch_dest;
     bool is_tight_loop;
     bool need_pkt_has_store_s1;
+    bool short_circuit;
 } DisasContext;
 
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
index da79b41c4d..d053d68487 100644
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env)
 
 void arch_fpop_end(CPUHexagonState *env)
 {
+    const bool pkt_need_commit = true;
     int flags = get_float_exception_flags(&env->fp_status);
     if (flags != 0) {
         SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index ab40cfc283..4adf90dcfa 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -52,6 +52,8 @@ static Property hexagon_lldb_compat_property =
 static Property hexagon_lldb_stack_adjust_property =
     DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust,
                          0, qdev_prop_uint32, target_ulong);
+static Property hexagon_short_circuit_property =
+    DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true);
 
 const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
    "r0", "r1",  "r2",  "r3",  "r4",   "r5",  "r6",  "r7",
@@ -328,6 +330,7 @@ static void hexagon_cpu_init(Object *obj)
     cpu_set_cpustate_pointers(cpu);
     qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property);
     qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property);
+    qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property);
 }
 
 #include "hw/core/tcg-cpu-ops.h"
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index e4c116edc0..9431beebc3 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num)
 
 #define IMMUTABLE (~0)
 
-static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = {
+const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = {
     [HEX_REG_USR] = 0xc13000c0,
     [HEX_REG_PC] = IMMUTABLE,
     [HEX_REG_GP] = 0x3f,
@@ -70,14 +70,18 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
 
 static TCGv get_result_gpr(DisasContext *ctx, int rnum)
 {
-    return hex_new_value[rnum];
+    if (ctx->need_commit) {
+        return hex_new_value[rnum];
+    } else {
+        return hex_gpr[rnum];
+    }
 }
 
 static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum)
 {
     TCGv_i64 result = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(result, hex_new_value[rnum],
-                                   hex_new_value[rnum + 1]);
+    tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum),
+                                   get_result_gpr(ctx, rnum + 1));
     return result;
 }
 
@@ -86,7 +90,7 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val)
     const target_ulong reg_mask = reg_immut_masks[rnum];
 
     gen_masked_reg_write(val, hex_gpr[rnum], reg_mask);
-    tcg_gen_mov_tl(hex_new_value[rnum], val);
+    tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val);
     if (HEX_DEBUG) {
         /* Do this so HELPER(debug_commit_end) will know */
         tcg_gen_movi_tl(hex_reg_written[rnum], 1);
@@ -95,27 +99,15 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val)
 
 static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
 {
-    const target_ulong reg_mask_low = reg_immut_masks[rnum];
-    const target_ulong reg_mask_high = reg_immut_masks[rnum + 1];
     TCGv val32 = tcg_temp_new();
 
     /* Low word */
     tcg_gen_extrl_i64_i32(val32, val);
-    gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low);
-    tcg_gen_mov_tl(hex_new_value[rnum], val32);
-    if (HEX_DEBUG) {
-        /* Do this so HELPER(debug_commit_end) will know */
-        tcg_gen_movi_tl(hex_reg_written[rnum], 1);
-    }
+    gen_log_reg_write(ctx, rnum, val32);
 
     /* High word */
     tcg_gen_extrh_i64_i32(val32, val);
-    gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high);
-    tcg_gen_mov_tl(hex_new_value[rnum + 1], val32);
-    if (HEX_DEBUG) {
-        /* Do this so HELPER(debug_commit_end) will know */
-        tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
-    }
+    gen_log_reg_write(ctx, rnum + 1, val32);
 }
 
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 46ccc59106..fc5c30a141 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -220,7 +220,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
                 reg_printed = true;
             }
             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
-                          i, env->new_value[i], env->new_value[i]);
+                          i, env->gpr[i], env->gpr[i]);
         }
     }
 
@@ -352,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
 }
 
 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
-                           int64_t RxxV, int64_t RssV, int64_t RttV)
+                           int64_t RxxV, int64_t RssV, int64_t RttV,
+                           uint32_t pkt_need_commit)
 {
     for (int i = 0; i < 4; i++) {
         int xv = sextract64(RxxV, i * 16, 16);
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 004d91f108..a87f7e7130 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -27,6 +27,7 @@
 #include "insn.h"
 #include "decode.h"
 #include "translate.h"
+#include "genptr.h"
 #include "printinsn.h"
 
 #include "analyze_funcs_generated.c.inc"
@@ -336,6 +337,58 @@ static void mark_implicit_pred_writes(DisasContext *ctx)
     mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
 }
 
+static bool pkt_raises_exception(Packet *pkt)
+{
+    if (check_for_attrib(pkt, A_LOAD) ||
+        check_for_attrib(pkt, A_STORE)) {
+        return true;
+    }
+    return false;
+}
+
+static bool need_commit(DisasContext *ctx)
+{
+    Packet *pkt = ctx->pkt;
+
+    /*
+     * If the short-circuit property is set to false, we'll always do the commit
+     */
+    if (!ctx->short_circuit) {
+        return true;
+    }
+
+    if (pkt_raises_exception(pkt)) {
+        return true;
+    }
+
+    /* Registers with immutability flags require new_value */
+    for (int i = 0; i < ctx->reg_log_idx; i++) {
+        int rnum = ctx->reg_log[i];
+        if (reg_immut_masks[rnum]) {
+            return true;
+        }
+    }
+
+    /* Floating point instructions are hard-coded to use new_value */
+    if (check_for_attrib(pkt, A_FPOP)) {
+        return true;
+    }
+
+    if (pkt->num_insns == 1) {
+        return false;
+    }
+
+    /* Check for overlap between register reads and writes */
+    for (int i = 0; i < ctx->reg_log_idx; i++) {
+        int rnum = ctx->reg_log[i];
+        if (test_bit(rnum, ctx->regs_read)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
 {
     if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
@@ -365,6 +418,8 @@ static void analyze_packet(DisasContext *ctx)
         mark_implicit_pred_writes(ctx);
         mark_implicit_pred_reads(ctx);
     }
+
+    ctx->need_commit = need_commit(ctx);
 }
 
 static void gen_start_packet(DisasContext *ctx)
@@ -434,7 +489,8 @@ static void gen_start_packet(DisasContext *ctx)
     }
 
     /* Preload the predicated registers into hex_new_value[i] */
-    if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
+    if (ctx->need_commit &&
+        !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
         int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
         while (i < TOTAL_PER_THREAD_REGS) {
             tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
@@ -541,6 +597,11 @@ static void gen_reg_writes(DisasContext *ctx)
 {
     int i;
 
+    /* Early exit if not needed */
+    if (!ctx->need_commit) {
+        return;
+    }
+
     for (i = 0; i < ctx->reg_log_idx; i++) {
         int reg_num = ctx->reg_log[i];
 
@@ -919,6 +980,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
                                           CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr);
     uint32_t hex_flags = dcbase->tb->flags;
 
     ctx->mem_idx = MMU_USER_IDX;
@@ -927,6 +989,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
     ctx->num_hvx_insns = 0;
     ctx->branch_cond = TCG_COND_NEVER;
     ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
+    ctx->short_circuit = hex_cpu->short_circuit;
 }
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index c73d792580..e259ea3d03 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms):
 
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write(", uint32_t pkt_has_multi_cof")
+        if (hex_common.need_pkt_need_commit(tag)):
+            f.write(", uint32_t pkt_need_commit")
 
         if hex_common.need_PC(tag):
             if i > 0:
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index 187cd6e04e..c5ecb85294 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
             def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1
             if hex_common.need_pkt_has_multi_cof(tag):
                 def_helper_size += 1
+            if hex_common.need_pkt_need_commit(tag):
+                def_helper_size += 1
             if hex_common.need_part1(tag):
                 def_helper_size += 1
             if hex_common.need_slot(tag):
@@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
             def_helper_size = len(regs) + len(imms) + numscalarreadwrite
             if hex_common.need_pkt_has_multi_cof(tag):
                 def_helper_size += 1
+            if hex_common.need_pkt_need_commit(tag):
+                def_helper_size += 1
             if hex_common.need_part1(tag):
                 def_helper_size += 1
             if hex_common.need_slot(tag):
@@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
         for immlett, bits, immshift in imms:
             f.write(", s32")
 
-        ## Add the arguments for the instruction pkt_has_multi_cof, slot and
-        ## part1 (if needed)
+        ## Add the arguments for the instruction pkt_has_multi_cof,
+        ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed)
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write(", i32")
+        if hex_common.need_pkt_need_commit(tag):
+            f.write(', i32')
         if hex_common.need_PC(tag):
             f.write(", i32")
         if hex_common.helper_needs_next_PC(tag):
diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py
index afe68bdb6f..b7f2df0f36 100644
--- a/target/hexagon/gen_idef_parser_funcs.py
+++ b/target/hexagon/gen_idef_parser_funcs.py
@@ -109,6 +109,13 @@ def main():
                 continue
             if "A_COF" in hex_common.attribdict[tag]:
                 continue
+            ## Skip instructions that are incompatible with short-circuit
+            ## packet register writes
+            if ( tag == 'S2_insert' or
+                 tag == 'S2_insert_rp' or
+                 tag == 'S2_asr_r_svw_trun' or
+                 tag == 'A2_swiz' ):
+                continue
 
             regs = tagregs[tag]
             imms = tagimms[tag]
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index d9ccbe63f6..0e45d43685 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -550,6 +550,9 @@ def gen_tcg_func(f, tag, regs, imms):
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write("    TCGv pkt_has_multi_cof = ")
             f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n")
+        if hex_common.need_pkt_need_commit(tag):
+            f.write("    TCGv pkt_need_commit = ")
+            f.write("tcg_constant_tl(ctx->need_commit);\n")
         if hex_common.need_part1(tag):
             f.write("    TCGv part1 = tcg_constant_tl(insn->part1);\n")
         if hex_common.need_slot(tag):
@@ -596,6 +599,8 @@ def gen_tcg_func(f, tag, regs, imms):
 
         if hex_common.need_pkt_has_multi_cof(tag):
             f.write(", pkt_has_multi_cof")
+        if hex_common.need_pkt_need_commit(tag):
+            f.write(", pkt_need_commit")
         if hex_common.need_PC(tag):
             f.write(", PC")
         if hex_common.helper_needs_next_PC(tag):
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 232c6e2c20..29c0508f66 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -276,6 +276,9 @@ def need_pkt_has_multi_cof(tag):
     return "A_COF" in attribdict[tag]
 
 
+def need_pkt_need_commit(tag):
+    return 'A_IMPLICIT_WRITES_USR' in attribdict[tag]
+
 def need_condexec_reg(tag, regs):
     if "A_CONDEXEC" in attribdict[tag]:
         for regtype, regid, toss, numregs in regs:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 12/21] Hexagon (target/hexagon) Short-circuit packet predicate writes
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
  2023-04-26  0:42 ` [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 10:41   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 13/21] Hexagon (target/hexagon) Short-circuit packet HVX writes Taylor Simpson
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

In certain cases, we can avoid the overhead of writing to hex_new_pred_value
and write directly to hex_pred.  We consider predicate reads/writes when
computing ctx->need_commit.  The get_result_pred() function uses this
field to decide between hex_new_pred_value and hex_pred.  Then, we can
early-exit from gen_pred_writes.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/genptr.h    |  1 +
 target/hexagon/genptr.c    | 15 ++++++++++++---
 target/hexagon/translate.c | 14 +++++++++++---
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h
index 420867f934..e11ccc2358 100644
--- a/target/hexagon/genptr.h
+++ b/target/hexagon/genptr.h
@@ -35,6 +35,7 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot);
 void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot);
 TCGv gen_read_reg(TCGv result, int num);
 TCGv gen_read_preg(TCGv pred, uint8_t num);
+TCGv get_result_pred(DisasContext *ctx, int pnum);
 void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val);
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val);
 void gen_set_usr_field(DisasContext *ctx, int field, TCGv val);
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 9431beebc3..da68d19ed3 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -110,8 +110,18 @@ static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
     gen_log_reg_write(ctx, rnum + 1, val32);
 }
 
+TCGv get_result_pred(DisasContext *ctx, int pnum)
+{
+    if (ctx->need_commit) {
+        return hex_new_pred_value[pnum];
+    } else {
+        return hex_pred[pnum];
+    }
+}
+
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
 {
+    TCGv pred = get_result_pred(ctx, pnum);
     TCGv base_val = tcg_temp_new();
 
     tcg_gen_andi_tl(base_val, val, 0xff);
@@ -124,10 +134,9 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
      * straight assignment.  Otherwise, do an and.
      */
     if (!test_bit(pnum, ctx->pregs_written)) {
-        tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val);
+        tcg_gen_mov_tl(pred, base_val);
     } else {
-        tcg_gen_and_tl(hex_new_pred_value[pnum],
-                       hex_new_pred_value[pnum], base_val);
+        tcg_gen_and_tl(pred, pred, base_val);
     }
     if (HEX_DEBUG) {
         tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index a87f7e7130..07ed36f6a8 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -386,6 +386,14 @@ static bool need_commit(DisasContext *ctx)
         }
     }
 
+    /* Check for overlap between predicate reads and writes */
+    for (int i = 0; i < ctx->preg_log_idx; i++) {
+        int pnum = ctx->preg_log[i];
+        if (test_bit(pnum, ctx->pregs_read)) {
+            return true;
+        }
+    }
+
     return false;
 }
 
@@ -503,7 +511,7 @@ static void gen_start_packet(DisasContext *ctx)
      * Preload the predicated pred registers into hex_new_pred_value[pred_num]
      * Only endloop instructions conditionally write to pred registers
      */
-    if (pkt->pkt_has_endloop) {
+    if (ctx->need_commit && pkt->pkt_has_endloop) {
         for (int i = 0; i < ctx->preg_log_idx; i++) {
             int pred_num = ctx->preg_log[i];
             tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]);
@@ -619,8 +627,8 @@ static void gen_reg_writes(DisasContext *ctx)
 
 static void gen_pred_writes(DisasContext *ctx)
 {
-    /* Early exit if the log is empty */
-    if (!ctx->preg_log_idx) {
+    /* Early exit if not needed or the log is empty */
+    if (!ctx->need_commit || !ctx->preg_log_idx) {
         return;
     }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 13/21] Hexagon (target/hexagon) Short-circuit packet HVX writes
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
  2023-04-26  0:42 ` [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes Taylor Simpson
  2023-04-26  0:42 ` [PATCH 12/21] Hexagon (target/hexagon) Short-circuit packet predicate writes Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 10:42   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 14/21] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets Taylor Simpson
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

In certain cases, we can avoid the overhead of writing to future_VRegs
and write directly to VRegs.  We consider HVX reads/writes when computing
ctx->need_commit.  Then, we can early-exit from gen_commit_hvx.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/genptr.c    |  6 ++++-
 target/hexagon/translate.c | 46 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index da68d19ed3..8e5afab931 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -1101,7 +1101,11 @@ static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num,
 
 static intptr_t get_result_qreg(DisasContext *ctx, int qnum)
 {
-    return  offsetof(CPUHexagonState, future_QRegs[qnum]);
+    if (ctx->need_commit) {
+        return  offsetof(CPUHexagonState, future_QRegs[qnum]);
+    } else {
+        return  offsetof(CPUHexagonState, QRegs[qnum]);
+    }
 }
 
 static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src,
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 07ed36f6a8..8e024b2cd2 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -70,6 +70,10 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
 {
     intptr_t offset;
 
+    if (!ctx->need_commit) {
+        return offsetof(CPUHexagonState, VRegs[regnum]);
+    }
+
     /* See if it is already allocated */
     for (int i = 0; i < ctx->future_vregs_idx; i++) {
         if (ctx->future_vregs_num[i] == regnum) {
@@ -374,7 +378,7 @@ static bool need_commit(DisasContext *ctx)
         return true;
     }
 
-    if (pkt->num_insns == 1) {
+    if (pkt->num_insns == 1 && !pkt->pkt_has_hvx) {
         return false;
     }
 
@@ -394,6 +398,40 @@ static bool need_commit(DisasContext *ctx)
         }
     }
 
+    /* Check for overlap between HVX reads and writes */
+    for (int i = 0; i < ctx->vreg_log_idx; i++) {
+        int vnum = ctx->vreg_log[i];
+        if (test_bit(vnum, ctx->vregs_read)) {
+            return true;
+        }
+    }
+    if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
+        int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
+        while (i < NUM_VREGS) {
+            if (test_bit(i, ctx->vregs_read)) {
+                return true;
+            }
+            i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
+        }
+    }
+    if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
+        int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
+        while (i < NUM_VREGS) {
+            if (test_bit(i, ctx->vregs_read)) {
+                return true;
+            }
+            i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
+        }
+    }
+
+    /* Check for overlap between HVX predicate reads and writes */
+    for (int i = 0; i < ctx->qreg_log_idx; i++) {
+        int qnum = ctx->qreg_log[i];
+        if (test_bit(qnum, ctx->qregs_read)) {
+            return true;
+        }
+    }
+
     return false;
 }
 
@@ -787,6 +825,12 @@ static void gen_commit_hvx(DisasContext *ctx)
 {
     int i;
 
+    /* Early exit if not needed */
+    if (!ctx->need_commit) {
+        g_assert(!pkt_has_hvx_store(ctx->pkt));
+        return;
+    }
+
     /*
      *    for (i = 0; i < ctx->vreg_log_idx; i++) {
      *        int rnum = ctx->vreg_log[i];
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 14/21] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (2 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 13/21] Hexagon (target/hexagon) Short-circuit packet HVX writes Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 10:44   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 15/21] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns Taylor Simpson
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

The generated helpers for HVX use pass-by-reference, so they can't
short-circuit when the reads/writes overlap.  The instructions with
overrides are OK because they use tcg_gen_gvec_*.

We add a flag has_hvx_helper to DisasContext and extend gen_analyze_funcs
to set the flag when the instruction is an HVX instruction with a
generated helper.

We add an override for V6_vcombine so that it can be short-circuited
along with a test case in tests/tcg/hexagon/hvx_misc.c

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/gen_tcg_hvx.h        | 23 +++++++++++++++++++++++
 target/hexagon/translate.h          |  1 +
 target/hexagon/translate.c          | 17 +++++++++++++++--
 tests/tcg/hexagon/hvx_misc.c        | 21 +++++++++++++++++++++
 target/hexagon/gen_analyze_funcs.py |  5 +++++
 5 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h
index d4aefe8e3f..19680d8505 100644
--- a/target/hexagon/gen_tcg_hvx.h
+++ b/target/hexagon/gen_tcg_hvx.h
@@ -128,6 +128,29 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
     tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
                      sizeof(MMVector), sizeof(MMVector))
 
+/*
+ * Vector combine
+ *
+ * Be careful that the source and dest don't overlap
+ */
+#define fGEN_TCG_V6_vcombine(SHORTCODE) \
+    do { \
+        if (VddV_off != VuV_off) { \
+            tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+            tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } else { \
+            intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+            tcg_gen_gvec_mov(MO_64, tmpoff, VuV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+            tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+            tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), tmpoff, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } \
+    } while (0)
+
 /* Vector conditional move */
 #define fGEN_TCG_VEC_CMOV(PRED) \
     do { \
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 3f6fd3452c..26bcae0395 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -68,6 +68,7 @@ typedef struct DisasContext {
     bool is_tight_loop;
     bool need_pkt_has_store_s1;
     bool short_circuit;
+    bool has_hvx_helper;
 } DisasContext;
 
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 8e024b2cd2..267e6453ac 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -378,8 +378,20 @@ static bool need_commit(DisasContext *ctx)
         return true;
     }
 
-    if (pkt->num_insns == 1 && !pkt->pkt_has_hvx) {
-        return false;
+    if (pkt->num_insns == 1) {
+        if (pkt->pkt_has_hvx) {
+            /*
+             * The HVX instructions with generated helpers use
+             * pass-by-reference, so they need the read/write overlap
+             * check below.
+             * The HVX instructions with overrides are OK.
+             */
+            if (!ctx->has_hvx_helper) {
+                return false;
+            }
+        } else {
+            return false;
+        }
     }
 
     /* Check for overlap between register reads and writes */
@@ -454,6 +466,7 @@ static void analyze_packet(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
     ctx->need_pkt_has_store_s1 = false;
+    ctx->has_hvx_helper = false;
     for (int i = 0; i < pkt->num_insns; i++) {
         Insn *insn = &pkt->insn[i];
         ctx->insn = insn;
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index d0e64e035f..c89fe0253d 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -454,6 +454,25 @@ static void test_load_cur_predicated(void)
     check_output_w(__LINE__, BUFSIZE);
 }
 
+static void test_vcombine(void)
+{
+    for (int i = 0; i < BUFSIZE / 2; i++) {
+        asm volatile("v2 = vsplat(%0)\n\t"
+                     "v3 = vsplat(%1)\n\t"
+                     "v3:2 = vcombine(v2, v3)\n\t"
+                     "vmem(%2+#0) = v2\n\t"
+                     "vmem(%2+#1) = v3\n\t"
+                     :
+                     : "r"(2 * i), "r"(2 * i + 1), "r"(&output[2 * i])
+                     : "v2", "v3", "memory");
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            expect[2 * i].w[j] = 2 * i + 1;
+            expect[2 * i + 1].w[j] = 2 * i;
+        }
+    }
+    check_output_w(__LINE__, BUFSIZE);
+}
+
 int main()
 {
     init_buffers();
@@ -494,6 +513,8 @@ int main()
     test_load_tmp_predicated();
     test_load_cur_predicated();
 
+    test_vcombine();
+
     puts(err ? "FAIL" : "PASS");
     return err ? 1 : 0;
 }
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index 86aec5ac4b..36da669450 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -212,6 +212,11 @@ def gen_analyze_func(f, tag, regs, imms):
     if has_generated_helper and "A_SCALAR_LOAD" in hex_common.attribdict[tag]:
         f.write("    ctx->need_pkt_has_store_s1 = true;\n")
 
+    ## Mark HVX instructions with generated helpers
+    if (has_generated_helper and
+        "A_CVI" in hex_common.attribdict[tag]):
+        f.write("    ctx->has_hvx_helper = true;\n")
+
     f.write("}\n\n")
 
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 15/21] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (3 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 14/21] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 10:52   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 16/21] Hexagon (target/hexagon) Make special new_value for USR Taylor Simpson
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

The following have overrides
    S2_insert
    S2_insert_rp
    S2_asr_r_svw_trun
    A2_swiz

These instructions have semantics that write to the destination
before all the operand reads have been completed.  Therefore,
the idef-parser versions were disabled with the short-circuit patch.

Test cases added to tests/tcg/hexagon/read_write_overlap.c

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/gen_tcg.h               |  18 ++++
 target/hexagon/genptr.c                | 100 ++++++++++++++++++
 tests/tcg/hexagon/read_write_overlap.c | 136 +++++++++++++++++++++++++
 tests/tcg/hexagon/Makefile.target      |   1 +
 4 files changed, 255 insertions(+)
 create mode 100644 tests/tcg/hexagon/read_write_overlap.c

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index ec388629b3..63d3d05bcc 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -1183,6 +1183,24 @@
         tcg_gen_extrl_i64_i32(RdV, tmp); \
     } while (0)
 
+#define fGEN_TCG_S2_insert(SHORTCODE) \
+    do { \
+        int width = uiV; \
+        int offset = UiV; \
+        if (width != 0) { \
+            if (offset + width > 32) { \
+                width = 32 - offset; \
+            } \
+            tcg_gen_deposit_tl(RxV, RxV, RsV, offset, width); \
+        } \
+    } while (0)
+#define fGEN_TCG_S2_insert_rp(SHORTCODE) \
+    gen_insert_rp(ctx, RxV, RsV, RttV)
+#define fGEN_TCG_S2_asr_r_svw_trun(SHORTCODE) \
+    gen_asr_r_svw_trun(ctx, RdV, RssV, RtV)
+#define fGEN_TCG_A2_swiz(SHORTCODE) \
+    tcg_gen_bswap_tl(RdV, RsV)
+
 /* Floating point */
 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
     gen_helper_conv_sf2df(RddV, cpu_env, RsV)
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 8e5afab931..a4aa3b8cc2 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -1062,6 +1062,106 @@ static void gen_asl_r_r_sat(DisasContext *ctx, TCGv RdV, TCGv RsV, TCGv RtV)
     gen_set_label(done);
 }
 
+static void gen_insert_rp(DisasContext *ctx, TCGv RxV, TCGv RsV, TCGv_i64 RttV)
+{
+    /*
+     * int width = fZXTN(6, 32, (fGETWORD(1, RttV)));
+     * int offset = fSXTN(7, 32, (fGETWORD(0, RttV)));
+     * size8u_t mask = ((fCONSTLL(1) << width) - 1);
+     * if (offset < 0) {
+     *     RxV = 0;
+     * } else {
+     *     RxV &= ~(mask << offset);
+     *     RxV |= ((RsV & mask) << offset);
+     * }
+     */
+
+    TCGv width = tcg_temp_new();
+    TCGv offset = tcg_temp_new();
+    TCGv_i64 mask = tcg_temp_new_i64();
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 offset64 = tcg_temp_new_i64();
+    TCGLabel *label = gen_new_label();
+    TCGLabel *done = gen_new_label();
+
+    tcg_gen_extrh_i64_i32(width, RttV);
+    tcg_gen_extract_tl(width, width, 0, 6);
+    tcg_gen_extrl_i64_i32(offset, RttV);
+    tcg_gen_sextract_tl(offset, offset, 0, 7);
+    /* Possible values for offset are -64 .. 63 */
+    tcg_gen_brcondi_tl(TCG_COND_GE, offset, 0, label);
+    /* For negative offsets, zero out the result */
+    tcg_gen_movi_tl(RxV, 0);
+    tcg_gen_br(done);
+    gen_set_label(label);
+    /* At this point, possible values of offset are 0 .. 63 */
+    tcg_gen_ext_i32_i64(mask, width);
+    tcg_gen_shl_i64(mask, tcg_constant_i64(1), mask);
+    tcg_gen_subi_i64(mask, mask, 1);
+    tcg_gen_extu_i32_i64(result, RxV);
+    tcg_gen_ext_i32_i64(tmp, offset);
+    tcg_gen_shl_i64(tmp, mask, tmp);
+    tcg_gen_not_i64(tmp, tmp);
+    tcg_gen_and_i64(result, result, tmp);
+    tcg_gen_extu_i32_i64(tmp, RsV);
+    tcg_gen_and_i64(tmp, tmp, mask);
+    tcg_gen_extu_i32_i64(offset64, offset);
+    tcg_gen_shl_i64(tmp, tmp, offset64);
+    tcg_gen_or_i64(result, result, tmp);
+    tcg_gen_extrl_i64_i32(RxV, result);
+    gen_set_label(done);
+}
+
+static void gen_asr_r_svw_trun(DisasContext *ctx, TCGv RdV,
+                               TCGv_i64 RssV, TCGv RtV)
+{
+    /*
+     * for (int i = 0; i < 2; i++) {
+     *     fSETHALF(i, RdV, fGETHALF(0, ((fSXTN(7, 32, RtV) > 0) ?
+     *         (fCAST4_8s(fGETWORD(i, RssV)) >> fSXTN(7, 32, RtV)) :
+     *         (fCAST4_8s(fGETWORD(i, RssV)) << -fSXTN(7, 32, RtV)))));
+     * }
+     */
+    TCGv shift_amt32 = tcg_temp_new();
+    TCGv_i64 shift_amt64 = tcg_temp_new_i64();
+    TCGv_i64 tmp64 = tcg_temp_new_i64();
+    TCGv tmp32 = tcg_temp_new();
+    TCGLabel *label = gen_new_label();
+    TCGLabel *zero = gen_new_label();
+    TCGLabel *done =  gen_new_label();
+
+    tcg_gen_sextract_tl(shift_amt32, RtV, 0, 7);
+    /* Possible values of shift_amt32 are -64 .. 63 */
+    tcg_gen_brcondi_tl(TCG_COND_LE, shift_amt32, 0, label);
+    /* After branch, possible values of shift_amt32 are 1 .. 63 */
+    tcg_gen_ext_i32_i64(shift_amt64, shift_amt32);
+    for (int i = 0; i < 2; i++) {
+        tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32);
+        tcg_gen_sar_i64(tmp64, tmp64, shift_amt64);
+        tcg_gen_extrl_i64_i32(tmp32, tmp64);
+        tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16);
+    }
+    tcg_gen_br(done);
+    gen_set_label(label);
+    tcg_gen_neg_tl(shift_amt32, shift_amt32);
+    /*At this point, possible values of shift_amt32 are 0 .. 64 */
+    tcg_gen_brcondi_tl(TCG_COND_GT, shift_amt32, 63, zero);
+    /*At this point, possible values of shift_amt32 are 0 .. 63 */
+    tcg_gen_ext_i32_i64(shift_amt64, shift_amt32);
+    for (int i = 0; i < 2; i++) {
+        tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32);
+        tcg_gen_shl_i64(tmp64, tmp64, shift_amt64);
+        tcg_gen_extrl_i64_i32(tmp32, tmp64);
+        tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16);
+    }
+    tcg_gen_br(done);
+    gen_set_label(zero);
+    /* When the shift_amt is 64, zero out the result */
+    tcg_gen_movi_tl(RdV, 0);
+    gen_set_label(done);
+}
+
 static intptr_t vreg_src_off(DisasContext *ctx, int num)
 {
     intptr_t offset = offsetof(CPUHexagonState, VRegs[num]);
diff --git a/tests/tcg/hexagon/read_write_overlap.c b/tests/tcg/hexagon/read_write_overlap.c
new file mode 100644
index 0000000000..a75fc11dc4
--- /dev/null
+++ b/tests/tcg/hexagon/read_write_overlap.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test instructions where the semantics write to the destination
+ * before all the operand reads have been completed.
+ *
+ * These instructions are problematic when we short-circuit the
+ * register writes because the destination and source operands could
+ * be the same TCGv.
+ *
+ * We test by forcing the read and write to be register r7.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int err;
+
+static void __check(const char *filename, int line, int x, int expect)
+{
+    if (x != expect) {
+        printf("ERROR %s:%d - 0x%08x != 0x%08x\n",
+               filename, line, x, expect);
+        err++;
+    }
+}
+
+#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect))
+
+#define insert(RES, X, WIDTH, OFFSET) \
+    asm("r7 = %1\n\t" \
+        "r7 = insert(r7, #" #WIDTH ", #" #OFFSET ")\n\t" \
+        "%0 = r7\n\t" \
+        : "=r"(RES) : "r"(X) : "r7")
+
+static void test_insert(void)
+{
+    uint32_t res;
+
+    insert(res, 0x12345678, 8, 1);
+    check(res, 0x123456f0);
+    insert(res, 0x12345678, 0, 1);
+    check(res, 0x12345678);
+    insert(res, 0x12345678, 20, 16);
+    check(res, 0x56785678);
+}
+
+static inline uint32_t insert_rp(uint32_t x, uint32_t width, uint32_t offset)
+{
+    uint64_t width_offset = (uint64_t)width << 32 | offset;
+    uint32_t res;
+    asm("r7 = %1\n\t"
+        "r7 = insert(r7, %2)\n\t"
+        "%0 = r7\n\t"
+        : "=r"(res) : "r"(x), "r"(width_offset) : "r7");
+    return res;
+
+}
+
+static void test_insert_rp(void)
+{
+    check(insert_rp(0x12345678,   8,  1), 0x123456f0);
+    check(insert_rp(0x12345678,  63,  8), 0x34567878);
+    check(insert_rp(0x12345678, 127,  8), 0x34567878);
+    check(insert_rp(0x12345678,   8, 24), 0x78345678);
+    check(insert_rp(0x12345678,   8, 63), 0x12345678);
+    check(insert_rp(0x12345678,   8, 64), 0x00000000);
+}
+
+static inline uint32_t asr_r_svw_trun(uint64_t x, uint32_t y)
+{
+    uint32_t res;
+    asm("r7 = %2\n\t"
+        "r7 = vasrw(%1, r7)\n\t"
+        "%0 = r7\n\t"
+        : "=r"(res) : "r"(x), "r"(y) : "r7");
+    return res;
+}
+
+static void test_asr_r_svw_trun(void)
+{
+    check(asr_r_svw_trun(0x1111111122222222ULL, 5),
+          0x88881111);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 63),
+          0x00000000);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 64),
+          0x00000000);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 127),
+          0x22224444);
+    check(asr_r_svw_trun(0x1111111122222222ULL, 128),
+          0x11112222);
+    check(asr_r_svw_trun(0xffffffff22222222ULL, 128),
+          0xffff2222);
+}
+
+static inline uint32_t swiz(uint32_t x)
+{
+    uint32_t res;
+    asm("r7 = %1\n\t"
+        "r7 = swiz(r7)\n\t"
+        "%0 = r7\n\t"
+        : "=r"(res) : "r"(x) : "r7");
+    return res;
+}
+
+static void test_swiz(void)
+{
+    check(swiz(0x11223344), 0x44332211);
+}
+
+int main()
+{
+    test_insert();
+    test_insert_rp();
+    test_asr_r_svw_trun();
+    test_swiz();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index 7c94db4bc4..d8d3793732 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -45,6 +45,7 @@ HEX_TESTS += fpstuff
 HEX_TESTS += overflow
 HEX_TESTS += signal_context
 HEX_TESTS += reg_mut
+HEX_TESTS += read_write_overlap
 HEX_TESTS += vector_add_int
 HEX_TESTS += scatter_gather
 HEX_TESTS += hvx_misc
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 16/21] Hexagon (target/hexagon) Make special new_value for USR
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (4 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 15/21] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 10:57   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext Taylor Simpson
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

Precursor to moving new_value from the global state to DisasContext

USR will need to stay in the global state because some helpers will
set it's value

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h            |  1 +
 target/hexagon/genptr.h         |  1 +
 target/hexagon/macros.h         |  2 +-
 target/hexagon/translate.h      |  1 +
 target/hexagon/genptr.c         |  8 ++++++--
 target/hexagon/translate.c      | 22 +++++++++++++++-------
 target/hexagon/README           |  2 +-
 target/hexagon/gen_tcg_funcs.py |  2 +-
 8 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 9252055a38..3687f2caa2 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -86,6 +86,7 @@ typedef struct CPUArchState {
 
     uint8_t slot_cancelled;
     target_ulong new_value[TOTAL_PER_THREAD_REGS];
+    target_ulong new_value_usr;
 
     /*
      * Only used when HEX_DEBUG is on, but unconditionally included
diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h
index e11ccc2358..a4b43c2910 100644
--- a/target/hexagon/genptr.h
+++ b/target/hexagon/genptr.h
@@ -35,6 +35,7 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot);
 void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot);
 TCGv gen_read_reg(TCGv result, int num);
 TCGv gen_read_preg(TCGv pred, uint8_t num);
+TCGv get_result_gpr(DisasContext *ctx, int rnum);
 TCGv get_result_pred(DisasContext *ctx, int pnum);
 void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val);
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val);
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index a68446a367..27172193a0 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -46,7 +46,7 @@
 #define SET_USR_FIELD(FIELD, VAL) \
     do { \
         if (pkt_need_commit) { \
-            fINSERT_BITS(env->new_value[HEX_REG_USR], \
+            fINSERT_BITS(env->new_value_usr, \
                         reg_field_info[FIELD].width, \
                         reg_field_info[FIELD].offset, (VAL)); \
         } else { \
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 26bcae0395..4c17433a6f 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -191,6 +191,7 @@ extern TCGv hex_this_PC;
 extern TCGv hex_slot_cancelled;
 extern TCGv hex_branch_taken;
 extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
+extern TCGv hex_new_value_usr;
 extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
 extern TCGv hex_new_pred_value[NUM_PREGS];
 extern TCGv hex_pred_written;
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index a4aa3b8cc2..e6327425cd 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -68,10 +68,14 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
     }
 }
 
-static TCGv get_result_gpr(DisasContext *ctx, int rnum)
+TCGv get_result_gpr(DisasContext *ctx, int rnum)
 {
     if (ctx->need_commit) {
-        return hex_new_value[rnum];
+        if (rnum == HEX_REG_USR) {
+            return hex_new_value_usr;
+        } else {
+            return hex_new_value[rnum];
+        }
     } else {
         return hex_gpr[rnum];
     }
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 267e6453ac..0afb3a0993 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -45,6 +45,7 @@ TCGv hex_this_PC;
 TCGv hex_slot_cancelled;
 TCGv hex_branch_taken;
 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
+TCGv hex_new_value_usr;
 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
 TCGv hex_new_pred_value[NUM_PREGS];
 TCGv hex_pred_written;
@@ -547,12 +548,12 @@ static void gen_start_packet(DisasContext *ctx)
         tcg_gen_movi_tl(hex_pred_written, 0);
     }
 
-    /* Preload the predicated registers into hex_new_value[i] */
+    /* Preload the predicated registers into get_result_gpr(ctx, i) */
     if (ctx->need_commit &&
         !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
         int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
         while (i < TOTAL_PER_THREAD_REGS) {
-            tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
+            tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]);
             i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
                               i + 1);
         }
@@ -664,7 +665,7 @@ static void gen_reg_writes(DisasContext *ctx)
     for (i = 0; i < ctx->reg_log_idx; i++) {
         int reg_num = ctx->reg_log[i];
 
-        tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
+        tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num));
 
         /*
          * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
@@ -1177,10 +1178,14 @@ void hexagon_translate_init(void)
             offsetof(CPUHexagonState, gpr[i]),
             hexagon_regnames[i]);
 
-        snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
-        hex_new_value[i] = tcg_global_mem_new(cpu_env,
-            offsetof(CPUHexagonState, new_value[i]),
-            new_value_names[i]);
+        if (i == HEX_REG_USR) {
+            hex_new_value[i] = NULL;
+        } else {
+            snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
+            hex_new_value[i] = tcg_global_mem_new(cpu_env,
+                offsetof(CPUHexagonState, new_value[i]),
+                new_value_names[i]);
+        }
 
         if (HEX_DEBUG) {
             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
@@ -1190,6 +1195,9 @@ void hexagon_translate_init(void)
                 reg_written_names[i]);
         }
     }
+    hex_new_value_usr = tcg_global_mem_new(cpu_env,
+        offsetof(CPUHexagonState, new_value_usr), "new_value_usr");
+
     for (i = 0; i < NUM_PREGS; i++) {
         hex_pred[i] = tcg_global_mem_new(cpu_env,
             offsetof(CPUHexagonState, pred[i]),
diff --git a/target/hexagon/README b/target/hexagon/README
index fe90df63e8..a9a517cfc8 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -186,7 +186,7 @@ We also generate an analyze_<tag> function for each instruction.  Currently,
 these functions record the writes to registers by calling ctx_log_*.  During
 gen_start_packet, we invoke the analyze_<tag> function for each instruction in
 the packet, and we mark the implicit writes.  After the analysis is performed,
-we initialize hex_new_value for each of the predicated assignments.
+we initialize the result register for each of the predicated assignments.
 
 In addition to instruction semantics, we use a generator to create the decode
 tree.  This generation is also a two step process.  The first step is to run
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index 0e45d43685..a36117d57f 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -190,7 +190,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno):
         if regid in {"s", "t"}:
             f.write(
                 f"    TCGv {regtype}{regid}N = "
-                f"hex_new_value[insn->regno[{regno}]];\n"
+                f"get_result_gpr(ctx, insn->regno[{regno}]);\n"
             )
         else:
             print("Bad register parse: ", regtype, regid)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (5 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 16/21] Hexagon (target/hexagon) Make special new_value for USR Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 11:01   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 18/21] Hexagon (target/hexagon) Move new_pred_value " Taylor Simpson
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

The new_value array in the CPUHexagonState is only used for bookkeeping
within the translation of a packet.  With recent changes that eliminate
the need to free TCGv variables, these make more sense to be transient
and kept in DisasContext.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h       |  1 -
 target/hexagon/translate.h |  2 +-
 target/hexagon/genptr.c    |  6 +++++-
 target/hexagon/translate.c | 14 +++-----------
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 3687f2caa2..22aba20be2 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -85,7 +85,6 @@ typedef struct CPUArchState {
     target_ulong stack_start;
 
     uint8_t slot_cancelled;
-    target_ulong new_value[TOTAL_PER_THREAD_REGS];
     target_ulong new_value_usr;
 
     /*
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4c17433a6f..6dde487566 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -69,6 +69,7 @@ typedef struct DisasContext {
     bool need_pkt_has_store_s1;
     bool short_circuit;
     bool has_hvx_helper;
+    TCGv new_value[TOTAL_PER_THREAD_REGS];
 } DisasContext;
 
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
@@ -190,7 +191,6 @@ extern TCGv hex_pred[NUM_PREGS];
 extern TCGv hex_this_PC;
 extern TCGv hex_slot_cancelled;
 extern TCGv hex_branch_taken;
-extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
 extern TCGv hex_new_value_usr;
 extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
 extern TCGv hex_new_pred_value[NUM_PREGS];
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index e6327425cd..47d472f586 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -74,7 +74,11 @@ TCGv get_result_gpr(DisasContext *ctx, int rnum)
         if (rnum == HEX_REG_USR) {
             return hex_new_value_usr;
         } else {
-            return hex_new_value[rnum];
+            if (ctx->new_value[rnum] == NULL) {
+                ctx->new_value[rnum] = tcg_temp_new();
+                tcg_gen_movi_tl(ctx->new_value[rnum], 0);
+            }
+            return ctx->new_value[rnum];
         }
     } else {
         return hex_gpr[rnum];
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 0afb3a0993..3d6b22a577 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -44,7 +44,6 @@ TCGv hex_pred[NUM_PREGS];
 TCGv hex_this_PC;
 TCGv hex_slot_cancelled;
 TCGv hex_branch_taken;
-TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
 TCGv hex_new_value_usr;
 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
 TCGv hex_new_pred_value[NUM_PREGS];
@@ -513,6 +512,9 @@ static void gen_start_packet(DisasContext *ctx)
     }
     ctx->s1_store_processed = false;
     ctx->pre_commit = true;
+    for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
+        ctx->new_value[i] = NULL;
+    }
 
     analyze_packet(ctx);
 
@@ -1156,7 +1158,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
 }
 
 #define NAME_LEN               64
-static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
 static char store_addr_names[STORES_MAX][NAME_LEN];
@@ -1178,15 +1179,6 @@ void hexagon_translate_init(void)
             offsetof(CPUHexagonState, gpr[i]),
             hexagon_regnames[i]);
 
-        if (i == HEX_REG_USR) {
-            hex_new_value[i] = NULL;
-        } else {
-            snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
-            hex_new_value[i] = tcg_global_mem_new(cpu_env,
-                offsetof(CPUHexagonState, new_value[i]),
-                new_value_names[i]);
-        }
-
         if (HEX_DEBUG) {
             snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
                      hexagon_regnames[i]);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 18/21] Hexagon (target/hexagon) Move new_pred_value to DisasContext
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (6 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 11:03   ` Richard Henderson
  2023-04-26  0:42 ` [PATCH 19/21] Hexagon (target/hexagon) Move pred_written " Taylor Simpson
  2023-04-27  8:04 ` [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Richard Henderson
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

The new_pred_value array in the CPUHexagonState is only used for
bookkeeping within the translation of a packet.  With recent changes
that eliminate the need to free TCGv variables, these make more sense
to be transient and kept in DisasContext.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h                        |  1 -
 target/hexagon/gen_tcg.h                    | 12 ++++++------
 target/hexagon/translate.h                  |  2 +-
 target/hexagon/genptr.c                     | 10 +++++++---
 target/hexagon/idef-parser/parser-helpers.c |  2 +-
 target/hexagon/op_helper.c                  |  2 +-
 target/hexagon/translate.c                  | 16 ++++++----------
 target/hexagon/gen_tcg_funcs.py             |  2 +-
 8 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 22aba20be2..8ce2ceeee4 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -94,7 +94,6 @@ typedef struct CPUArchState {
     target_ulong this_PC;
     target_ulong reg_written[TOTAL_PER_THREAD_REGS];
 
-    target_ulong new_pred_value[NUM_PREGS];
     target_ulong pred_written;
 
     MemLog mem_log_stores[STORES_MAX];
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 63d3d05bcc..32a6d9bd7b 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -581,9 +581,9 @@
 #define fGEN_TCG_SL2_return_f(SHORTCODE) \
     gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0])
 #define fGEN_TCG_SL2_return_tnew(SHORTCODE) \
-    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, ctx->new_pred_value[0])
 #define fGEN_TCG_SL2_return_fnew(SHORTCODE) \
-    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0])
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, ctx->new_pred_value[0])
 
 /*
  * Mathematical operations with more than one definition require
@@ -1118,7 +1118,7 @@
 #define fGEN_TCG_SA1_clrtnew(SHORTCODE) \
     do { \
         TCGLabel *skip = gen_new_label(); \
-        tcg_gen_brcondi_tl(TCG_COND_EQ, hex_new_pred_value[0], 0, skip); \
+        tcg_gen_brcondi_tl(TCG_COND_EQ, ctx->new_pred_value[0], 0, skip); \
         tcg_gen_movi_tl(RdV, 0); \
         gen_set_label(skip); \
     } while (0)
@@ -1127,7 +1127,7 @@
 #define fGEN_TCG_SA1_clrfnew(SHORTCODE) \
     do { \
         TCGLabel *skip = gen_new_label(); \
-        tcg_gen_brcondi_tl(TCG_COND_NE, hex_new_pred_value[0], 0, skip); \
+        tcg_gen_brcondi_tl(TCG_COND_NE, ctx->new_pred_value[0], 0, skip); \
         tcg_gen_movi_tl(RdV, 0); \
         gen_set_label(skip); \
     } while (0)
@@ -1155,9 +1155,9 @@
     gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0])
 
 #define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \
-    gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+    gen_cond_jumpr31(ctx, TCG_COND_EQ, ctx->new_pred_value[0])
 #define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \
-    gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0])
+    gen_cond_jumpr31(ctx, TCG_COND_NE, ctx->new_pred_value[0])
 
 /* Count trailing zeros/ones */
 #define fGEN_TCG_S2_ct0(SHORTCODE) \
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 6dde487566..fdfa1b6fe3 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -70,6 +70,7 @@ typedef struct DisasContext {
     bool short_circuit;
     bool has_hvx_helper;
     TCGv new_value[TOTAL_PER_THREAD_REGS];
+    TCGv new_pred_value[NUM_PREGS];
 } DisasContext;
 
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
@@ -193,7 +194,6 @@ extern TCGv hex_slot_cancelled;
 extern TCGv hex_branch_taken;
 extern TCGv hex_new_value_usr;
 extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
-extern TCGv hex_new_pred_value[NUM_PREGS];
 extern TCGv hex_pred_written;
 extern TCGv hex_store_addr[STORES_MAX];
 extern TCGv hex_store_width[STORES_MAX];
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 47d472f586..e58054e7c9 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -121,7 +121,11 @@ static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
 TCGv get_result_pred(DisasContext *ctx, int pnum)
 {
     if (ctx->need_commit) {
-        return hex_new_pred_value[pnum];
+        if (ctx->new_pred_value[pnum] == NULL) {
+            ctx->new_pred_value[pnum] = tcg_temp_new();
+            tcg_gen_movi_tl(ctx->new_pred_value[pnum], 0);
+        }
+        return ctx->new_pred_value[pnum];
     } else {
         return hex_pred[pnum];
     }
@@ -607,7 +611,7 @@ static void gen_cmpnd_cmp_jmp(DisasContext *ctx,
         gen_log_pred_write(ctx, pnum, pred);
     } else {
         TCGv pred = tcg_temp_new();
-        tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]);
+        tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]);
         gen_cond_jump(ctx, cond2, pred, pc_off);
     }
 }
@@ -664,7 +668,7 @@ static void gen_cmpnd_tstbit0_jmp(DisasContext *ctx,
         gen_log_pred_write(ctx, pnum, pred);
     } else {
         TCGv pred = tcg_temp_new();
-        tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]);
+        tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]);
         gen_cond_jump(ctx, cond, pred, pc_off);
     }
 }
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
index ae0f60ada4..75c3b3efed 100644
--- a/target/hexagon/idef-parser/parser-helpers.c
+++ b/target/hexagon/idef-parser/parser-helpers.c
@@ -1856,7 +1856,7 @@ HexValue gen_rvalue_pred(Context *c, YYLTYPE *locp, HexValue *pred)
         *pred = gen_tmp(c, locp, 32, UNSIGNED);
         if (is_dotnew) {
             OUT(c, locp, "tcg_gen_mov_i32(", pred,
-                ", hex_new_pred_value[");
+                ", ctx->new_pred_value[");
             OUT(c, locp, pred_str, "]);\n");
         } else {
             OUT(c, locp, "gen_read_preg(", pred, ", ", pred_str, ");\n");
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index fc5c30a141..26fba9f5d6 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -231,7 +231,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
                 pred_printed = true;
             }
             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
-                          i, env->new_pred_value[i]);
+                          i, env->pred[i]);
         }
     }
 
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 3d6b22a577..c2e6557957 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -46,7 +46,6 @@ TCGv hex_slot_cancelled;
 TCGv hex_branch_taken;
 TCGv hex_new_value_usr;
 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
-TCGv hex_new_pred_value[NUM_PREGS];
 TCGv hex_pred_written;
 TCGv hex_store_addr[STORES_MAX];
 TCGv hex_store_width[STORES_MAX];
@@ -515,6 +514,9 @@ static void gen_start_packet(DisasContext *ctx)
     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
         ctx->new_value[i] = NULL;
     }
+    for (i = 0; i < NUM_PREGS; i++) {
+        ctx->new_pred_value[i] = NULL;
+    }
 
     analyze_packet(ctx);
 
@@ -568,7 +570,8 @@ static void gen_start_packet(DisasContext *ctx)
     if (ctx->need_commit && pkt->pkt_has_endloop) {
         for (int i = 0; i < ctx->preg_log_idx; i++) {
             int pred_num = ctx->preg_log[i];
-            tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]);
+            ctx->new_pred_value[pred_num] = tcg_temp_new();
+            tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]);
         }
     }
 
@@ -688,7 +691,7 @@ static void gen_pred_writes(DisasContext *ctx)
 
     for (int i = 0; i < ctx->preg_log_idx; i++) {
         int pred_num = ctx->preg_log[i];
-        tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
+        tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]);
     }
 }
 
@@ -1159,7 +1162,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
 
 #define NAME_LEN               64
 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
-static char new_pred_value_names[NUM_PREGS][NAME_LEN];
 static char store_addr_names[STORES_MAX][NAME_LEN];
 static char store_width_names[STORES_MAX][NAME_LEN];
 static char store_val32_names[STORES_MAX][NAME_LEN];
@@ -1194,12 +1196,6 @@ void hexagon_translate_init(void)
         hex_pred[i] = tcg_global_mem_new(cpu_env,
             offsetof(CPUHexagonState, pred[i]),
             hexagon_prednames[i]);
-
-        snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
-                 hexagon_prednames[i]);
-        hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
-            offsetof(CPUHexagonState, new_pred_value[i]),
-            new_pred_value_names[i]);
     }
     hex_pred_written = tcg_global_mem_new(cpu_env,
         offsetof(CPUHexagonState, pred_written), "pred_written");
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index a36117d57f..0403547387 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -198,7 +198,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno):
         if regid in {"t", "u", "v"}:
             f.write(
                 f"    TCGv {regtype}{regid}N = "
-                f"hex_new_pred_value[insn->regno[{regno}]];\n"
+                f"ctx->new_pred_value[insn->regno[{regno}]];\n"
             )
         else:
             print("Bad register parse: ", regtype, regid)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 19/21] Hexagon (target/hexagon) Move pred_written to DisasContext
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (7 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 18/21] Hexagon (target/hexagon) Move new_pred_value " Taylor Simpson
@ 2023-04-26  0:42 ` Taylor Simpson
  2023-04-27 11:05   ` Richard Henderson
  2023-04-27  8:04 ` [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Richard Henderson
  9 siblings, 1 reply; 21+ messages in thread
From: Taylor Simpson @ 2023-04-26  0:42 UTC (permalink / raw)
  To: qemu-devel
  Cc: tsimpson, richard.henderson, philmd, ale, anjo, bcain,
	quic_mathbern

The pred_written variable in the CPUHexagonState is only used for
bookkeeping within the translation of a packet.  With recent changes
that eliminate the need to free TCGv variables, these make more sense
to be transient and kept in DisasContext.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h       | 2 --
 target/hexagon/helper.h    | 2 +-
 target/hexagon/translate.h | 2 +-
 target/hexagon/genptr.c    | 2 +-
 target/hexagon/op_helper.c | 5 +++--
 target/hexagon/translate.c | 9 ++++-----
 6 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 8ce2ceeee4..26952cddcb 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -94,8 +94,6 @@ typedef struct CPUArchState {
     target_ulong this_PC;
     target_ulong reg_written[TOTAL_PER_THREAD_REGS];
 
-    target_ulong pred_written;
-
     MemLog mem_log_stores[STORES_MAX];
     target_ulong pkt_has_store_s1;
     target_ulong dczero_addr;
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index 4b750d0351..f3b298beee 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -21,7 +21,7 @@
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32)
 DEF_HELPER_1(debug_start_packet, void, env)
 DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int)
-DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int)
+DEF_HELPER_FLAGS_4(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int, int)
 DEF_HELPER_2(commit_store, void, env, int)
 DEF_HELPER_3(gather_store, void, env, i32, int)
 DEF_HELPER_1(commit_hvx_stores, void, env)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index fdfa1b6fe3..a9f1ccee24 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -71,6 +71,7 @@ typedef struct DisasContext {
     bool has_hvx_helper;
     TCGv new_value[TOTAL_PER_THREAD_REGS];
     TCGv new_pred_value[NUM_PREGS];
+    TCGv pred_written;
 } DisasContext;
 
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
@@ -194,7 +195,6 @@ extern TCGv hex_slot_cancelled;
 extern TCGv hex_branch_taken;
 extern TCGv hex_new_value_usr;
 extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
-extern TCGv hex_pred_written;
 extern TCGv hex_store_addr[STORES_MAX];
 extern TCGv hex_store_width[STORES_MAX];
 extern TCGv hex_store_val32[STORES_MAX];
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index e58054e7c9..b2fa91c5a3 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -151,7 +151,7 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
         tcg_gen_and_tl(pred, pred, base_val);
     }
     if (HEX_DEBUG) {
-        tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
+        tcg_gen_ori_tl(ctx->pred_written, ctx->pred_written, 1 << pnum);
     }
     set_bit(pnum, ctx->pregs_written);
 }
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 26fba9f5d6..f9021efc7e 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -203,7 +203,8 @@ static void print_store(CPUHexagonState *env, int slot)
 }
 
 /* This function is a handy place to set a breakpoint */
-void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
+void HELPER(debug_commit_end)(CPUHexagonState *env,
+                              int pred_written, int has_st0, int has_st1)
 {
     bool reg_printed = false;
     bool pred_printed = false;
@@ -225,7 +226,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
     }
 
     for (i = 0; i < NUM_PREGS; i++) {
-        if (env->pred_written & (1 << i)) {
+        if (pred_written & (1 << i)) {
             if (!pred_printed) {
                 HEX_DEBUG_LOG("Predicates written\n");
                 pred_printed = true;
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index c2e6557957..7636c604c9 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -46,7 +46,6 @@ TCGv hex_slot_cancelled;
 TCGv hex_branch_taken;
 TCGv hex_new_value_usr;
 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
-TCGv hex_pred_written;
 TCGv hex_store_addr[STORES_MAX];
 TCGv hex_store_width[STORES_MAX];
 TCGv hex_store_val32[STORES_MAX];
@@ -549,7 +548,8 @@ static void gen_start_packet(DisasContext *ctx)
         }
     }
     if (HEX_DEBUG) {
-        tcg_gen_movi_tl(hex_pred_written, 0);
+        ctx->pred_written = tcg_temp_new();
+        tcg_gen_movi_tl(ctx->pred_written, 0);
     }
 
     /* Preload the predicated registers into get_result_gpr(ctx, i) */
@@ -1004,7 +1004,8 @@ static void gen_commit_packet(DisasContext *ctx)
             tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
 
         /* Handy place to set a breakpoint at the end of execution */
-        gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
+        gen_helper_debug_commit_end(cpu_env, ctx->pred_written,
+                                    has_st0, has_st1);
     }
 
     if (pkt->vhist_insn != NULL) {
@@ -1197,8 +1198,6 @@ void hexagon_translate_init(void)
             offsetof(CPUHexagonState, pred[i]),
             hexagon_prednames[i]);
     }
-    hex_pred_written = tcg_global_mem_new(cpu_env,
-        offsetof(CPUHexagonState, pred_written), "pred_written");
     hex_this_PC = tcg_global_mem_new(cpu_env,
         offsetof(CPUHexagonState, this_PC), "this_PC");
     hex_slot_cancelled = tcg_global_mem_new(cpu_env,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis
  2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
                   ` (8 preceding siblings ...)
  2023-04-26  0:42 ` [PATCH 19/21] Hexagon (target/hexagon) Move pred_written " Taylor Simpson
@ 2023-04-27  8:04 ` Richard Henderson
  9 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27  8:04 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> Have gen_analyze_funcs mark the registers that are read by the
> instruction.  We also mark the implicit reads using instruction
> attributes.
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/translate.h          | 36 +++++++++++++++++++++++
>   target/hexagon/attribs_def.h.inc    |  6 +++-
>   target/hexagon/translate.c          | 20 +++++++++++++
>   target/hexagon/gen_analyze_funcs.py | 44 ++++++++++++++++++++---------
>   target/hexagon/hex_common.py        |  6 ++++
>   5 files changed, 97 insertions(+), 15 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes
  2023-04-26  0:42 ` [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes Taylor Simpson
@ 2023-04-27 10:40   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 10:40 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> In certain cases, we can avoid the overhead of writing to hex_new_value
> and write directly to hex_gpr.  We add need_commit field to DisasContext
> indicating if the end-of-packet commit is needed.  If it is not needed,
> get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
> 
> We pass the ctx->need_commit to helpers when needed.
> 
> Finally, we can early-exit from gen_reg_writes during packet commit.
> 
> There are a few instructions whose semantics write to the result before
> reading all the inputs.  Therefore, the idef-parser generated code is
> incompatible with short-circuit.  We tell idef-parser to skip them.
> 
> For debugging purposes, we add a cpu property to turn off short-circuit.
> When the short-circuit property is false, we skip the analysis and force
> the end-of-packet commit.
> 
> Here's a simple example of the TCG generated for
> 0x004000b4:  0x7800c020 {       R0 = #0x1 }
> 
> BEFORE:
>   ---- 004000b4
>   movi_i32 new_r0,$0x1
>   mov_i32 r0,new_r0
> 
> AFTER:
>   ---- 004000b4
>   movi_i32 r0,$0x1
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 12/21] Hexagon (target/hexagon) Short-circuit packet predicate writes
  2023-04-26  0:42 ` [PATCH 12/21] Hexagon (target/hexagon) Short-circuit packet predicate writes Taylor Simpson
@ 2023-04-27 10:41   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 10:41 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> In certain cases, we can avoid the overhead of writing to hex_new_pred_value
> and write directly to hex_pred.  We consider predicate reads/writes when
> computing ctx->need_commit.  The get_result_pred() function uses this
> field to decide between hex_new_pred_value and hex_pred.  Then, we can
> early-exit from gen_pred_writes.
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/genptr.h    |  1 +
>   target/hexagon/genptr.c    | 15 ++++++++++++---
>   target/hexagon/translate.c | 14 +++++++++++---
>   3 files changed, 24 insertions(+), 6 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 13/21] Hexagon (target/hexagon) Short-circuit packet HVX writes
  2023-04-26  0:42 ` [PATCH 13/21] Hexagon (target/hexagon) Short-circuit packet HVX writes Taylor Simpson
@ 2023-04-27 10:42   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 10:42 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> In certain cases, we can avoid the overhead of writing to future_VRegs
> and write directly to VRegs.  We consider HVX reads/writes when computing
> ctx->need_commit.  Then, we can early-exit from gen_commit_hvx.
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/genptr.c    |  6 ++++-
>   target/hexagon/translate.c | 46 +++++++++++++++++++++++++++++++++++++-
>   2 files changed, 50 insertions(+), 2 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 14/21] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets
  2023-04-26  0:42 ` [PATCH 14/21] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets Taylor Simpson
@ 2023-04-27 10:44   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 10:44 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> The generated helpers for HVX use pass-by-reference, so they can't
> short-circuit when the reads/writes overlap.  The instructions with
> overrides are OK because they use tcg_gen_gvec_*.
> 
> We add a flag has_hvx_helper to DisasContext and extend gen_analyze_funcs
> to set the flag when the instruction is an HVX instruction with a
> generated helper.
> 
> We add an override for V6_vcombine so that it can be short-circuited
> along with a test case in tests/tcg/hexagon/hvx_misc.c
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/gen_tcg_hvx.h        | 23 +++++++++++++++++++++++
>   target/hexagon/translate.h          |  1 +
>   target/hexagon/translate.c          | 17 +++++++++++++++--
>   tests/tcg/hexagon/hvx_misc.c        | 21 +++++++++++++++++++++
>   target/hexagon/gen_analyze_funcs.py |  5 +++++
>   5 files changed, 65 insertions(+), 2 deletions(-)

Ideally the vcombine override would be a separate patch.

Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 15/21] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns
  2023-04-26  0:42 ` [PATCH 15/21] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns Taylor Simpson
@ 2023-04-27 10:52   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 10:52 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> +    tcg_gen_not_i64(tmp, tmp);
> +    tcg_gen_and_i64(result, result, tmp);

tcg_gen_andc_i64(result, result, tmp);

Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 16/21] Hexagon (target/hexagon) Make special new_value for USR
  2023-04-26  0:42 ` [PATCH 16/21] Hexagon (target/hexagon) Make special new_value for USR Taylor Simpson
@ 2023-04-27 10:57   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 10:57 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> Precursor to moving new_value from the global state to DisasContext
> 
> USR will need to stay in the global state because some helpers will
> set it's value
> 
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/cpu.h            |  1 +
>   target/hexagon/genptr.h         |  1 +
>   target/hexagon/macros.h         |  2 +-
>   target/hexagon/translate.h      |  1 +
>   target/hexagon/genptr.c         |  8 ++++++--
>   target/hexagon/translate.c      | 22 +++++++++++++++-------
>   target/hexagon/README           |  2 +-
>   target/hexagon/gen_tcg_funcs.py |  2 +-
>   8 files changed, 27 insertions(+), 12 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext
  2023-04-26  0:42 ` [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext Taylor Simpson
@ 2023-04-27 11:01   ` Richard Henderson
  2023-04-27 11:03     ` Richard Henderson
  0 siblings, 1 reply; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 11:01 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> +    for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
> +        ctx->new_value[i] = NULL;
> +    }

Perhaps

   memset(ctx->new_value, 0, sizeof(ctx->new_value));

Though probably the compiler would make that transformation.
Or perhaps

-    DisasContext ctx;
+    DisasContext ctx = { 0 };

in gen_intermediate_code, and eliminate other 0 init in gen_start_packet?

But it's not wrong as-is, so,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext
  2023-04-27 11:01   ` Richard Henderson
@ 2023-04-27 11:03     ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 11:03 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/27/23 12:01, Richard Henderson wrote:
> On 4/26/23 01:42, Taylor Simpson wrote:
>> +    for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
>> +        ctx->new_value[i] = NULL;
>> +    }
> 
> Perhaps
> 
>    memset(ctx->new_value, 0, sizeof(ctx->new_value));
> 
> Though probably the compiler would make that transformation.
> Or perhaps
> 
> -    DisasContext ctx;
> +    DisasContext ctx = { 0 };
> 
> in gen_intermediate_code, and eliminate other 0 init in gen_start_packet?

Duh, start packet is called more than once per TB.


r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 18/21] Hexagon (target/hexagon) Move new_pred_value to DisasContext
  2023-04-26  0:42 ` [PATCH 18/21] Hexagon (target/hexagon) Move new_pred_value " Taylor Simpson
@ 2023-04-27 11:03   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 11:03 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> The new_pred_value array in the CPUHexagonState is only used for
> bookkeeping within the translation of a packet.  With recent changes
> that eliminate the need to free TCGv variables, these make more sense
> to be transient and kept in DisasContext.
> 
> Suggested-by: Richard Henderson<richard.henderson@linaro.org>
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/cpu.h                        |  1 -
>   target/hexagon/gen_tcg.h                    | 12 ++++++------
>   target/hexagon/translate.h                  |  2 +-
>   target/hexagon/genptr.c                     | 10 +++++++---
>   target/hexagon/idef-parser/parser-helpers.c |  2 +-
>   target/hexagon/op_helper.c                  |  2 +-
>   target/hexagon/translate.c                  | 16 ++++++----------
>   target/hexagon/gen_tcg_funcs.py             |  2 +-
>   8 files changed, 23 insertions(+), 24 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 19/21] Hexagon (target/hexagon) Move pred_written to DisasContext
  2023-04-26  0:42 ` [PATCH 19/21] Hexagon (target/hexagon) Move pred_written " Taylor Simpson
@ 2023-04-27 11:05   ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2023-04-27 11:05 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel; +Cc: philmd, ale, anjo, bcain, quic_mathbern

On 4/26/23 01:42, Taylor Simpson wrote:
> The pred_written variable in the CPUHexagonState is only used for
> bookkeeping within the translation of a packet.  With recent changes
> that eliminate the need to free TCGv variables, these make more sense
> to be transient and kept in DisasContext.
> 
> Suggested-by: Richard Henderson<richard.henderson@linaro.org>
> Signed-off-by: Taylor Simpson<tsimpson@quicinc.com>
> ---
>   target/hexagon/cpu.h       | 2 --
>   target/hexagon/helper.h    | 2 +-
>   target/hexagon/translate.h | 2 +-
>   target/hexagon/genptr.c    | 2 +-
>   target/hexagon/op_helper.c | 5 +++--
>   target/hexagon/translate.c | 9 ++++-----
>   6 files changed, 10 insertions(+), 12 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-04-27 11:06 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-26  0:42 [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Taylor Simpson
2023-04-26  0:42 ` [PATCH 11/21] Hexagon (target/hexagon) Short-circuit packet register writes Taylor Simpson
2023-04-27 10:40   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 12/21] Hexagon (target/hexagon) Short-circuit packet predicate writes Taylor Simpson
2023-04-27 10:41   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 13/21] Hexagon (target/hexagon) Short-circuit packet HVX writes Taylor Simpson
2023-04-27 10:42   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 14/21] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets Taylor Simpson
2023-04-27 10:44   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 15/21] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns Taylor Simpson
2023-04-27 10:52   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 16/21] Hexagon (target/hexagon) Make special new_value for USR Taylor Simpson
2023-04-27 10:57   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 17/21] Hexagon (target/hexagon) Move new_value to DisasContext Taylor Simpson
2023-04-27 11:01   ` Richard Henderson
2023-04-27 11:03     ` Richard Henderson
2023-04-26  0:42 ` [PATCH 18/21] Hexagon (target/hexagon) Move new_pred_value " Taylor Simpson
2023-04-27 11:03   ` Richard Henderson
2023-04-26  0:42 ` [PATCH 19/21] Hexagon (target/hexagon) Move pred_written " Taylor Simpson
2023-04-27 11:05   ` Richard Henderson
2023-04-27  8:04 ` [PATCH 10/21] Hexagon (target/hexagon) Mark registers as read during packet analysis Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).