qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/20] tcg/optimize: Track and use known 1's
@ 2025-05-05 20:27 Richard Henderson
  2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
                   ` (19 more replies)
  0 siblings, 20 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

We already track and use known 0's; do the same for 1's.
This actually simplifies some of the logical operations,
where before required checking for a constant value.


r~


Richard Henderson (20):
  tcg/optimize: Introduce arg_const_val
  tcg/optimize: Add one's mask to TempOptInfo
  tcg/optimize: Build and use o_bits in fold_and
  tcg/optimize: Build and use o_bits in fold_andc
  tcg/optimize: Build and use z_bits and o_bits in fold_eqv
  tcg/optimize: Build and use z_bits and o_bits in fold_nand
  tcg/optimize: Build and use z_bits and o_bits in fold_nor
  tcg/optimize: Build and use z_bits and o_bits in fold_not
  tcg/optimize: Build and use one and affected bits in fold_or
  tcg/optimize: Build and use zero, one and affected bits in fold_orc
  tcg/optimize: Build and use o_bits in fold_xor
  tcg/optimize: Build and use o_bits in fold_bswap
  tcg/optimize: Build and use o_bits in fold_deposit
  tcg/optimize: Build and use o_bits in fold_extract
  tcg/optimize: Build and use z_bits and o_bits in fold_extract2
  tcg/optimize: Build and use o_bits in fold_exts
  tcg/optimize: Build and use o_bits in fold_extu
  tcg/optimize: Build and use o_bits in fold_movcond
  tcg/optimize: Build and use o_bits in fold_sextract
  tcg/optimize: Build and use o_bits in fold_shift

 tcg/optimize.c | 359 +++++++++++++++++++++++++++++++------------------
 1 file changed, 226 insertions(+), 133 deletions(-)

-- 
2.43.0



^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 01/20] tcg/optimize: Introduce arg_const_val
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-06-24 22:21   ` Pierrick Bouvier
  2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
                   ` (18 subsequent siblings)
  19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Use arg_const_val instead of direct access to the TempOptInfo val
member.  Rename both val and is_const to catch all direct accesses.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 78 ++++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 10a76c5461..73a272eeb3 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,11 +39,11 @@ typedef struct MemCopyInfo {
 } MemCopyInfo;
 
 typedef struct TempOptInfo {
-    bool is_const;
+    bool is_const_;
     TCGTemp *prev_copy;
     TCGTemp *next_copy;
     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
-    uint64_t val;
+    uint64_t val_;
     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
     uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
 } TempOptInfo;
@@ -73,12 +73,12 @@ static inline TempOptInfo *arg_info(TCGArg arg)
 
 static inline bool ti_is_const(TempOptInfo *ti)
 {
-    return ti->is_const;
+    return ti->is_const_;
 }
 
 static inline uint64_t ti_const_val(TempOptInfo *ti)
 {
-    return ti->val;
+    return ti->val_;
 }
 
 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
@@ -101,6 +101,11 @@ static inline bool arg_is_const(TCGArg arg)
     return ts_is_const(arg_temp(arg));
 }
 
+static inline uint64_t arg_const_val(TCGArg arg)
+{
+    return ti_const_val(arg_info(arg));
+}
+
 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
 {
     return ts_is_const_val(arg_temp(arg), val);
@@ -137,12 +142,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
     ti->prev_copy = ts;
     QSIMPLEQ_INIT(&ti->mem_copy);
     if (ts->kind == TEMP_CONST) {
-        ti->is_const = true;
-        ti->val = ts->val;
+        ti->is_const_ = true;
+        ti->val_ = ts->val;
         ti->z_mask = ts->val;
         ti->s_mask = INT64_MIN >> clrsb64(ts->val);
     } else {
-        ti->is_const = false;
+        ti->is_const_ = false;
         ti->z_mask = -1;
         ti->s_mask = 0;
     }
@@ -229,7 +234,7 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts)
     pi->next_copy = ti->next_copy;
     ti->next_copy = ts;
     ti->prev_copy = ts;
-    ti->is_const = false;
+    ti->is_const_ = false;
     ti->z_mask = -1;
     ti->s_mask = 0;
 
@@ -394,8 +399,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
         di->prev_copy = src_ts;
         ni->prev_copy = dst_ts;
         si->next_copy = dst_ts;
-        di->is_const = si->is_const;
-        di->val = si->val;
+        di->is_const_ = si->is_const_;
+        di->val_ = si->val_;
 
         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
@@ -687,8 +692,8 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
                                     TCGArg y, TCGCond c)
 {
     if (arg_is_const(x) && arg_is_const(y)) {
-        uint64_t xv = arg_info(x)->val;
-        uint64_t yv = arg_info(y)->val;
+        uint64_t xv = arg_const_val(x);
+        uint64_t yv = arg_const_val(y);
 
         switch (type) {
         case TCG_TYPE_I32:
@@ -801,14 +806,14 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
      * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
      */
     if (args_are_copies(*p1, *p2) ||
-        (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
+        (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
         *p2 = arg_new_constant(ctx, 0);
         *pcond = tcg_tst_eqne_cond(cond);
         return -1;
     }
 
     /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
-    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
+    if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
         *p2 = arg_new_constant(ctx, 0);
         *pcond = tcg_tst_ltge_cond(cond);
         return -1;
@@ -849,13 +854,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
     bh = args[3];
 
     if (arg_is_const(bl) && arg_is_const(bh)) {
-        tcg_target_ulong blv = arg_info(bl)->val;
-        tcg_target_ulong bhv = arg_info(bh)->val;
+        tcg_target_ulong blv = arg_const_val(bl);
+        tcg_target_ulong bhv = arg_const_val(bh);
         uint64_t b = deposit64(blv, 32, 32, bhv);
 
         if (arg_is_const(al) && arg_is_const(ah)) {
-            tcg_target_ulong alv = arg_info(al)->val;
-            tcg_target_ulong ahv = arg_info(ah)->val;
+            tcg_target_ulong alv = arg_const_val(al);
+            tcg_target_ulong ahv = arg_const_val(ah);
             uint64_t a = deposit64(alv, 32, 32, ahv);
 
             r = do_constant_folding_cond_64(a, b, c);
@@ -989,9 +994,8 @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
 static bool fold_const1(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[1])) {
-        uint64_t t;
+        uint64_t t = arg_const_val(op->args[1]);
 
-        t = arg_info(op->args[1])->val;
         t = do_constant_folding(op->opc, ctx->type, t, 0);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
@@ -1001,8 +1005,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
 static bool fold_const2(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-        uint64_t t1 = arg_info(op->args[1])->val;
-        uint64_t t2 = arg_info(op->args[2])->val;
+        uint64_t t1 = arg_const_val(op->args[1]);
+        uint64_t t2 = arg_const_val(op->args[2]);
 
         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
@@ -1486,8 +1490,8 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
     }
 
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
-        uint64_t tv = arg_info(op->args[2])->val;
-        uint64_t fv = arg_info(op->args[3])->val;
+        uint64_t tv = arg_const_val(op->args[2]);
+        uint64_t fv = arg_const_val(op->args[3]);
 
         if (tv == -1 && fv == 0) {
             return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
@@ -1504,7 +1508,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
         }
     }
     if (arg_is_const(op->args[2])) {
-        uint64_t tv = arg_info(op->args[2])->val;
+        uint64_t tv = arg_const_val(op->args[2]);
         if (tv == -1) {
             op->opc = INDEX_op_or_vec;
             op->args[2] = op->args[3];
@@ -1518,7 +1522,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
         }
     }
     if (arg_is_const(op->args[3])) {
-        uint64_t fv = arg_info(op->args[3])->val;
+        uint64_t fv = arg_const_val(op->args[3]);
         if (fv == 0) {
             op->opc = INDEX_op_and_vec;
             return fold_and(ctx, op);
@@ -1876,7 +1880,7 @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
 static bool fold_dup(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[1])) {
-        uint64_t t = arg_info(op->args[1])->val;
+        uint64_t t = arg_const_val(op->args[1]);
         t = dup_const(TCGOP_VECE(op), t);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
@@ -1886,8 +1890,8 @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
 static bool fold_dup2(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
-                               arg_info(op->args[2])->val);
+        uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
+                               arg_const_val(op->args[2]));
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
 
@@ -1958,8 +1962,8 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
 static bool fold_extract2(OptContext *ctx, TCGOp *op)
 {
     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-        uint64_t v1 = arg_info(op->args[1])->val;
-        uint64_t v2 = arg_info(op->args[2])->val;
+        uint64_t v1 = arg_const_val(op->args[1]);
+        uint64_t v2 = arg_const_val(op->args[2]);
         int shr = op->args[3];
 
         if (ctx->type == TCG_TYPE_I32) {
@@ -2127,8 +2131,8 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
 
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
-        uint64_t a = arg_info(op->args[2])->val;
-        uint64_t b = arg_info(op->args[3])->val;
+        uint64_t a = arg_const_val(op->args[2]);
+        uint64_t b = arg_const_val(op->args[3]);
         uint64_t h, l;
         TCGArg rl, rh;
         TCGOp *op2;
@@ -2330,7 +2334,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
     }
 
     a_zmask = arg_info(op->args[1])->z_mask;
-    b_val = arg_info(op->args[2])->val;
+    b_val = arg_const_val(op->args[2]);
     cond = op->args[3];
 
     if (ctx->type == TCG_TYPE_I32) {
@@ -2418,7 +2422,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
     }
 
     src2 = op->args[2];
-    val = arg_info(src2)->val;
+    val = arg_const_val(src2);
     if (!is_power_of_2(val)) {
         return;
     }
@@ -2669,7 +2673,7 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
     TCGOpcode neg_op;
     bool have_neg;
 
-    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
+    if (!arg_is_const_val(op->args[1], 0)) {
         return false;
     }
 
@@ -2719,7 +2723,7 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
 
     /* Fold sub r,x,i to add r,x,-i */
     if (arg_is_const(op->args[2])) {
-        uint64_t val = arg_info(op->args[2])->val;
+        uint64_t val = arg_const_val(op->args[2]);
 
         op->opc = INDEX_op_add;
         op->args[2] = arg_new_constant(ctx, -val);
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
  2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-06 11:05   ` Paolo Bonzini
  2025-06-24 22:38   ` Pierrick Bouvier
  2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
                   ` (17 subsequent siblings)
  19 siblings, 2 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Add o_mask mirroring z_mask, but for 1's instead of 0's.
Drop is_const and val fields, which now logically overlap.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 51 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 73a272eeb3..395ad8232a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,12 +39,11 @@ typedef struct MemCopyInfo {
 } MemCopyInfo;
 
 typedef struct TempOptInfo {
-    bool is_const_;
     TCGTemp *prev_copy;
     TCGTemp *next_copy;
     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
-    uint64_t val_;
     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+    uint64_t o_mask;  /* mask bit is 1 if and only if value bit is 1 */
     uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
 } TempOptInfo;
 
@@ -73,12 +72,14 @@ static inline TempOptInfo *arg_info(TCGArg arg)
 
 static inline bool ti_is_const(TempOptInfo *ti)
 {
-    return ti->is_const_;
+    /* If all bits that are not known zeros are known ones, it's constant. */
+    return ti->z_mask == ti->o_mask;
 }
 
 static inline uint64_t ti_const_val(TempOptInfo *ti)
 {
-    return ti->val_;
+    /* If constant, both z_mask and o_mask contain the value. */
+    return ti->z_mask;
 }
 
 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
@@ -142,13 +143,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
     ti->prev_copy = ts;
     QSIMPLEQ_INIT(&ti->mem_copy);
     if (ts->kind == TEMP_CONST) {
-        ti->is_const_ = true;
-        ti->val_ = ts->val;
         ti->z_mask = ts->val;
+        ti->o_mask = ts->val;
         ti->s_mask = INT64_MIN >> clrsb64(ts->val);
     } else {
-        ti->is_const_ = false;
         ti->z_mask = -1;
+        ti->o_mask = 0;
         ti->s_mask = 0;
     }
 }
@@ -234,8 +234,8 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts)
     pi->next_copy = ti->next_copy;
     ti->next_copy = ts;
     ti->prev_copy = ts;
-    ti->is_const_ = false;
     ti->z_mask = -1;
+    ti->o_mask = 0;
     ti->s_mask = 0;
 
     if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
@@ -390,6 +390,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     op->args[1] = src;
 
     di->z_mask = si->z_mask;
+    di->o_mask = si->o_mask;
     di->s_mask = si->s_mask;
 
     if (src_ts->type == dst_ts->type) {
@@ -399,13 +400,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
         di->prev_copy = src_ts;
         ni->prev_copy = dst_ts;
         si->next_copy = dst_ts;
-        di->is_const_ = si->is_const_;
-        di->val_ = si->val_;
 
         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
             move_mem_copies(dst_ts, src_ts);
         }
+    } else if (dst_ts->type == TCG_TYPE_I32) {
+        di->z_mask = (int32_t)di->z_mask;
+        di->o_mask = (int32_t)di->o_mask;
+        di->s_mask |= INT32_MIN;
+    } else {
+        di->z_mask |= MAKE_64BIT_MASK(32, 32);
+        di->o_mask = (uint32_t)di->o_mask;
+        di->s_mask = INT64_MIN;
     }
     return true;
 }
@@ -1032,8 +1039,8 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
  * If z_mask allows, fold the output to constant zero.
  * The passed s_mask may be augmented by z_mask.
  */
-static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
-                          uint64_t z_mask, int64_t s_mask)
+static bool fold_masks_zos(OptContext *ctx, TCGOp *op, uint64_t z_mask,
+                           uint64_t o_mask, int64_t s_mask)
 {
     const TCGOpDef *def = &tcg_op_defs[op->opc];
     TCGTemp *ts;
@@ -1052,9 +1059,18 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
      */
     if (ctx->type == TCG_TYPE_I32) {
         z_mask = (int32_t)z_mask;
+        o_mask = (int32_t)o_mask;
         s_mask |= INT32_MIN;
     }
 
+    /* Bits that are known 1 and bits that are known 0 must not overlap. */
+    tcg_debug_assert((o_mask & ~z_mask) == 0);
+
+    /* All bits that are not known zero are known one is a constant. */
+    if (z_mask == o_mask) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
+    }
+    /* All bits known zero is zero. */
     if (z_mask == 0) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
     }
@@ -1068,20 +1084,27 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
     /* Canonicalize s_mask and incorporate data from z_mask. */
     rep = clz64(~s_mask);
     rep = MAX(rep, clz64(z_mask));
+    rep = MAX(rep, clz64(~o_mask));
     rep = MAX(rep - 1, 0);
     ti->s_mask = INT64_MIN >> rep;
 
     return true;
 }
 
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
+                          uint64_t z_mask, uint64_t s_mask)
+{
+    return fold_masks_zos(ctx, op, z_mask, 0, s_mask);
+}
+
 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
 {
-    return fold_masks_zs(ctx, op, z_mask, 0);
+    return fold_masks_zos(ctx, op, z_mask, 0, 0);
 }
 
 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
 {
-    return fold_masks_zs(ctx, op, -1, s_mask);
+    return fold_masks_zos(ctx, op, -1, 0, s_mask);
 }
 
 /*
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
  2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
  2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-06-24 22:42   ` Pierrick Bouvier
  2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
                   ` (16 subsequent siblings)
  19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 395ad8232a..c9f0f46b83 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1420,7 +1420,7 @@ static bool fold_addco(OptContext *ctx, TCGOp *op)
 
 static bool fold_and(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z1, z2, z_mask, s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2_commutative(ctx, op) ||
@@ -1432,26 +1432,21 @@ static bool fold_and(OptContext *ctx, TCGOp *op)
 
     t1 = arg_info(op->args[1]);
     t2 = arg_info(op->args[2]);
-    z1 = t1->z_mask;
-    z2 = t2->z_mask;
 
-    /*
-     * Known-zeros does not imply known-ones.  Therefore unless
-     * arg2 is constant, we can't infer affected bits from it.
-     */
-    if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
+    /* Affected bits are those not known zero, masked by those known one. */
+    if (fold_affected_mask(ctx, op, t1->z_mask & ~t2->o_mask)) {
         return true;
     }
 
-    z_mask = z1 & z2;
-
+    z_mask = t1->z_mask & t2->z_mask;
+    o_mask = t1->o_mask & t2->o_mask;
     /*
      * Sign repetitions are perforce all identical, whether they are 1 or 0.
      * Bitwise operations preserve the relative quantity of the repetitions.
      */
     s_mask = t1->s_mask & t2->s_mask;
 
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_andc(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (2 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-06-24 23:20   ` Pierrick Bouvier
  2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
                   ` (15 subsequent siblings)
  19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c9f0f46b83..faee3e8580 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1451,7 +1451,7 @@ static bool fold_and(OptContext *ctx, TCGOp *op)
 
 static bool fold_andc(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2(ctx, op) ||
@@ -1463,7 +1463,6 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
 
     t1 = arg_info(op->args[1]);
     t2 = arg_info(op->args[2]);
-    z_mask = t1->z_mask;
 
     if (ti_is_const(t2)) {
         /* Fold andc r,x,i to and r,x,~i. */
@@ -1484,20 +1483,16 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
         return fold_and(ctx, op);
     }
 
-    /*
-     * Known-zeros does not imply known-ones.  Therefore unless
-     * arg2 is constant, we can't infer anything from it.
-     */
-    if (ti_is_const(t2)) {
-        uint64_t v2 = ti_const_val(t2);
-        if (fold_affected_mask(ctx, op, z_mask & v2)) {
-            return true;
-        }
-        z_mask &= ~v2;
+    /* Affected bits are those not known zero, masked by those known zero. */
+    if (fold_affected_mask(ctx, op, t1->z_mask & t2->z_mask)) {
+        return true;
     }
 
+    z_mask = t1->z_mask & ~t2->o_mask;
+    o_mask = t1->o_mask & ~t2->z_mask;
     s_mask = t1->s_mask & t2->s_mask;
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (3 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-06-24 23:55   ` Pierrick Bouvier
  2025-05-05 20:27 ` [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand Richard Henderson
                   ` (14 subsequent siblings)
  19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index faee3e8580..08d15e5395 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1917,7 +1917,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
 
 static bool fold_eqv(OptContext *ctx, TCGOp *op)
 {
-    uint64_t s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2_commutative(ctx, op) ||
@@ -1947,8 +1947,12 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
     }
 
     t1 = arg_info(op->args[1]);
+
+    z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
+    o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
     s_mask = t1->s_mask & t2->s_mask;
-    return fold_masks_s(ctx, op, s_mask);
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_extract(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (4 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor Richard Henderson
                   ` (13 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 08d15e5395..9721c80943 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2192,16 +2192,22 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
 
 static bool fold_nand(OptContext *ctx, TCGOp *op)
 {
-    uint64_t s_mask;
+    uint64_t z_mask, o_mask, s_mask;
+    TempOptInfo *t1, *t2;
 
     if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_not(ctx, op, -1)) {
         return true;
     }
 
-    s_mask = arg_info(op->args[1])->s_mask
-           & arg_info(op->args[2])->s_mask;
-    return fold_masks_s(ctx, op, s_mask);
+    t1 = arg_info(op->args[1]);
+    t2 = arg_info(op->args[2]);
+
+    z_mask = ~(t1->o_mask & t2->o_mask);
+    o_mask = ~(t1->z_mask & t2->z_mask);
+    s_mask = t1->s_mask & t2->s_mask;
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (5 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not Richard Henderson
                   ` (12 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9721c80943..d5256aa02c 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2226,16 +2226,22 @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
 
 static bool fold_nor(OptContext *ctx, TCGOp *op)
 {
-    uint64_t s_mask;
+    uint64_t z_mask, o_mask, s_mask;
+    TempOptInfo *t1, *t2;
 
     if (fold_const2_commutative(ctx, op) ||
         fold_xi_to_not(ctx, op, 0)) {
         return true;
     }
 
-    s_mask = arg_info(op->args[1])->s_mask
-           & arg_info(op->args[2])->s_mask;
-    return fold_masks_s(ctx, op, s_mask);
+    t1 = arg_info(op->args[1]);
+    t2 = arg_info(op->args[2]);
+
+    z_mask = ~(t1->o_mask | t2->o_mask);
+    o_mask = ~(t1->z_mask | t2->z_mask);
+    s_mask = t1->s_mask & t2->s_mask;
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_not(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (6 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or Richard Henderson
                   ` (11 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index d5256aa02c..8fbf682e6d 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2246,10 +2246,14 @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
 
 static bool fold_not(OptContext *ctx, TCGOp *op)
 {
+    TempOptInfo *t1;
+
     if (fold_const1(ctx, op)) {
         return true;
     }
-    return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
+
+    t1 = arg_info(op->args[1]);
+    return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
 }
 
 static bool fold_or(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (7 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc Richard Henderson
                   ` (10 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8fbf682e6d..22d302c9bf 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2258,7 +2258,7 @@ static bool fold_not(OptContext *ctx, TCGOp *op)
 
 static bool fold_or(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2_commutative(ctx, op) ||
@@ -2269,9 +2269,17 @@ static bool fold_or(OptContext *ctx, TCGOp *op)
 
     t1 = arg_info(op->args[1]);
     t2 = arg_info(op->args[2]);
+
+    /* Affected bits are those not known one, masked by those known zero. */
+    if (fold_affected_mask(ctx, op, ~t1->o_mask & t2->z_mask)) {
+        return true;
+    }
+
     z_mask = t1->z_mask | t2->z_mask;
+    o_mask = t1->o_mask | t2->o_mask;
     s_mask = t1->s_mask & t2->s_mask;
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_orc(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (8 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor Richard Henderson
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 22d302c9bf..5081e3db1f 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2284,7 +2284,7 @@ static bool fold_or(OptContext *ctx, TCGOp *op)
 
 static bool fold_orc(OptContext *ctx, TCGOp *op)
 {
-    uint64_t s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2(ctx, op) ||
@@ -2315,8 +2315,17 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
     }
 
     t1 = arg_info(op->args[1]);
+
+    /* Affected bits are those not known one, masked by those known one. */
+    if (fold_affected_mask(ctx, op, ~t1->o_mask & t2->o_mask)) {
+        return true;
+    }
+
+    z_mask = t1->z_mask | ~t2->o_mask;
+    o_mask = t1->o_mask | ~t2->z_mask;
     s_mask = t1->s_mask & t2->s_mask;
-    return fold_masks_s(ctx, op, s_mask);
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (9 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap Richard Henderson
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 5081e3db1f..a61c7ca376 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -3038,7 +3038,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
 
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2_commutative(ctx, op) ||
@@ -3050,9 +3050,12 @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
 
     t1 = arg_info(op->args[1]);
     t2 = arg_info(op->args[2]);
-    z_mask = t1->z_mask | t2->z_mask;
+
+    z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
+    o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
     s_mask = t1->s_mask & t2->s_mask;
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 /* Propagate constants and copies, fold constant expressions. */
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (10 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit Richard Henderson
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 49 ++++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index a61c7ca376..2898a3f913 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1666,53 +1666,52 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_bswap(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, s_mask, sign;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1 = arg_info(op->args[1]);
+    int flags = op->args[2];
 
     if (ti_is_const(t1)) {
         return tcg_opt_gen_movi(ctx, op, op->args[0],
                                 do_constant_folding(op->opc, ctx->type,
-                                                    ti_const_val(t1),
-                                                    op->args[2]));
+                                                    ti_const_val(t1), flags));
     }
 
     z_mask = t1->z_mask;
+    o_mask = t1->o_mask;
+    s_mask = 0;
+
     switch (op->opc) {
     case INDEX_op_bswap16:
         z_mask = bswap16(z_mask);
-        sign = INT16_MIN;
+        o_mask = bswap16(o_mask);
+        if (flags & TCG_BSWAP_OS) {
+            z_mask = (int16_t)z_mask;
+            o_mask = (int16_t)o_mask;
+            s_mask = INT16_MIN;
+        } else if (!(flags & TCG_BSWAP_OZ)) {
+            z_mask |= MAKE_64BIT_MASK(16, 48);
+        }
         break;
     case INDEX_op_bswap32:
         z_mask = bswap32(z_mask);
-        sign = INT32_MIN;
+        o_mask = bswap32(o_mask);
+        if (flags & TCG_BSWAP_OS) {
+            z_mask = (int32_t)z_mask;
+            o_mask = (int32_t)o_mask;
+            s_mask = INT32_MIN;
+        } else if (!(flags & TCG_BSWAP_OZ)) {
+            z_mask |= MAKE_64BIT_MASK(32, 32);
+        }
         break;
     case INDEX_op_bswap64:
         z_mask = bswap64(z_mask);
-        sign = INT64_MIN;
+        o_mask = bswap64(o_mask);
         break;
     default:
         g_assert_not_reached();
     }
 
-    s_mask = 0;
-    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
-    case TCG_BSWAP_OZ:
-        break;
-    case TCG_BSWAP_OS:
-        /* If the sign bit may be 1, force all the bits above to 1. */
-        if (z_mask & sign) {
-            z_mask |= sign;
-        }
-        /* The value and therefore s_mask is explicitly sign-extended. */
-        s_mask = sign;
-        break;
-    default:
-        /* The high bits are undefined: force all bits above the sign to 1. */
-        z_mask |= sign << 1;
-        break;
-    }
-
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_call(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (11 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract Richard Henderson
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2898a3f913..886947b82b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1842,7 +1842,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
     int ofs = op->args[3];
     int len = op->args[4];
     int width = 8 * tcg_type_size(ctx->type);
-    uint64_t z_mask, s_mask;
+    uint64_t z_mask, o_mask, s_mask;
 
     if (ti_is_const(t1) && ti_is_const(t2)) {
         return tcg_opt_gen_movi(ctx, op, op->args[0],
@@ -1877,7 +1877,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
     }
 
     z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+    o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
+
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_divide(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (12 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 886947b82b..c5243392af 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1091,6 +1091,12 @@ static bool fold_masks_zos(OptContext *ctx, TCGOp *op, uint64_t z_mask,
     return true;
 }
 
+static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
+                          uint64_t z_mask, uint64_t o_mask)
+{
+    return fold_masks_zos(ctx, op, z_mask, o_mask, 0);
+}
+
 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
                           uint64_t z_mask, uint64_t s_mask)
 {
@@ -1958,7 +1964,7 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
 
 static bool fold_extract(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask_old, z_mask;
+    uint64_t z_mask_old, z_mask, o_mask;
     TempOptInfo *t1 = arg_info(op->args[1]);
     int pos = op->args[2];
     int len = op->args[3];
@@ -1974,7 +1980,8 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
         return true;
     }
 
-    return fold_masks_z(ctx, op, z_mask);
+    o_mask = extract64(t1->o_mask, pos, len);
+    return fold_masks_zo(ctx, op, z_mask, o_mask);
 }
 
 static bool fold_extract2(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (13 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-06 11:12   ` Paolo Bonzini
  2025-05-05 20:27 ` [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts Richard Henderson
                   ` (4 subsequent siblings)
  19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c5243392af..424fd57922 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1986,21 +1986,37 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
 
 static bool fold_extract2(OptContext *ctx, TCGOp *op)
 {
-    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-        uint64_t v1 = arg_const_val(op->args[1]);
-        uint64_t v2 = arg_const_val(op->args[2]);
-        int shr = op->args[3];
+    TempOptInfo *t1 = arg_info(op->args[1]);
+    TempOptInfo *t2 = arg_info(op->args[2]);
+    uint64_t v1 = ti_const_val(t1);
+    uint64_t v2 = ti_const_val(t2);
+    uint64_t z1 = t1->z_mask;
+    uint64_t z2 = t2->z_mask;
+    uint64_t o1 = t1->o_mask;
+    uint64_t o2 = t2->o_mask;
+    int shr = op->args[3];
 
-        if (ctx->type == TCG_TYPE_I32) {
-            v1 = (uint32_t)v1 >> shr;
-            v2 = (uint64_t)((int32_t)v2 << (32 - shr));
-        } else {
-            v1 >>= shr;
-            v2 <<= 64 - shr;
-        }
+    if (ctx->type == TCG_TYPE_I32) {
+        v1 = (uint32_t)v1 >> shr;
+        z1 = (uint32_t)z1 >> shr;
+        o1 = (uint32_t)o1 >> shr;
+        v2 = (uint64_t)((int32_t)v2 << (32 - shr));
+        z2 = (uint64_t)((int32_t)z2 << (32 - shr));
+        o2 = (uint64_t)((int32_t)o2 << (32 - shr));
+    } else {
+        v1 >>= shr;
+        z1 >>= shr;
+        o1 >>= shr;
+        v2 <<= 64 - shr;
+        z2 <<= 64 - shr;
+        o2 <<= 64 - shr;
+    }
+
+    if (ti_is_const(t1) && ti_is_const(t2)) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
     }
-    return finish_folding(ctx, op);
+
+    return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
 }
 
 static bool fold_exts(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (14 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu Richard Henderson
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 424fd57922..d7f017accf 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2021,7 +2021,7 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
 
 static bool fold_exts(OptContext *ctx, TCGOp *op)
 {
-    uint64_t s_mask, z_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *t1;
 
     if (fold_const1(ctx, op)) {
@@ -2030,17 +2030,19 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
 
     t1 = arg_info(op->args[1]);
     z_mask = t1->z_mask;
+    o_mask = t1->o_mask;
     s_mask = t1->s_mask;
 
     switch (op->opc) {
     case INDEX_op_ext_i32_i64:
         s_mask |= INT32_MIN;
         z_mask = (int32_t)z_mask;
+        o_mask = (int32_t)o_mask;
         break;
     default:
         g_assert_not_reached();
     }
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_extu(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (15 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond Richard Henderson
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index d7f017accf..1d722bebc2 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2047,25 +2047,31 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
 
 static bool fold_extu(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask;
+    uint64_t z_mask, o_mask;
+    TempOptInfo *t1;
 
     if (fold_const1(ctx, op)) {
         return true;
     }
 
-    z_mask = arg_info(op->args[1])->z_mask;
+    t1 = arg_info(op->args[1]);
+    z_mask = t1->z_mask;
+    o_mask = t1->o_mask;
+
     switch (op->opc) {
     case INDEX_op_extrl_i64_i32:
     case INDEX_op_extu_i32_i64:
         z_mask = (uint32_t)z_mask;
+        o_mask = (uint32_t)o_mask;
         break;
     case INDEX_op_extrh_i64_i32:
         z_mask >>= 32;
+        o_mask >>= 32;
         break;
     default:
         g_assert_not_reached();
     }
-    return fold_masks_z(ctx, op, z_mask);
+    return fold_masks_zo(ctx, op, z_mask, o_mask);
 }
 
 static bool fold_mb(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (16 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract Richard Henderson
  2025-05-05 20:27 ` [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift Richard Henderson
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1d722bebc2..1040e67220 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2105,7 +2105,7 @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
 
 static bool fold_movcond(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, s_mask;
+    uint64_t z_mask, o_mask, s_mask;
     TempOptInfo *tt, *ft;
     int i;
 
@@ -2131,6 +2131,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
     tt = arg_info(op->args[3]);
     ft = arg_info(op->args[4]);
     z_mask = tt->z_mask | ft->z_mask;
+    o_mask = tt->o_mask & ft->o_mask;
     s_mask = tt->s_mask & ft->s_mask;
 
     if (ti_is_const(tt) && ti_is_const(ft)) {
@@ -2153,7 +2154,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
         }
     }
 
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_mul(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (17 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  2025-05-05 20:27 ` [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift Richard Henderson
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1040e67220..f36636ed02 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2681,7 +2681,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_sextract(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, s_mask, s_mask_old;
+    uint64_t z_mask, o_mask, s_mask, s_mask_old;
     TempOptInfo *t1 = arg_info(op->args[1]);
     int pos = op->args[2];
     int len = op->args[3];
@@ -2700,7 +2700,8 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
     }
 
     z_mask = sextract64(t1->z_mask, pos, len);
-    return fold_masks_zs(ctx, op, z_mask, s_mask);
+    o_mask = sextract64(t1->o_mask, pos, len);
+    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
 }
 
 static bool fold_shift(OptContext *ctx, TCGOp *op)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift
  2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
                   ` (18 preceding siblings ...)
  2025-05-05 20:27 ` [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
  19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index f36636ed02..ab05206ee5 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2706,7 +2706,7 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
 
 static bool fold_shift(OptContext *ctx, TCGOp *op)
 {
-    uint64_t s_mask, z_mask;
+    uint64_t s_mask, z_mask, o_mask;
     TempOptInfo *t1, *t2;
 
     if (fold_const2(ctx, op) ||
@@ -2719,14 +2719,16 @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
     t2 = arg_info(op->args[2]);
     s_mask = t1->s_mask;
     z_mask = t1->z_mask;
+    o_mask = t1->o_mask;
 
     if (ti_is_const(t2)) {
         int sh = ti_const_val(t2);
 
         z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+        o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
 
-        return fold_masks_zs(ctx, op, z_mask, s_mask);
+        return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
     }
 
     switch (op->opc) {
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
  2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
@ 2025-05-06 11:05   ` Paolo Bonzini
  2025-05-06 14:12     ` Richard Henderson
  2025-06-24 22:38   ` Pierrick Bouvier
  1 sibling, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2025-05-06 11:05 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 22:27, Richard Henderson wrote:
> +    /* Bits that are known 1 and bits that are known 0 must not overlap. */
> +    tcg_debug_assert((o_mask & ~z_mask) == 0);
> +
> +    /* All bits that are not known zero are known one is a constant. */
> +    if (z_mask == o_mask) {
> +        return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
> +    }
> +    /* All bits known zero is zero. */
>       if (z_mask == 0) {
>           return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
>       }
Just a nit, but this second check is now redundant.  z_mask == 0 implies 
that o_mask == 0 (otherwise o_mask & z_mask is not zero); therefore the 
"if (z_mask == o_mask)" must have returned from fold_masks_zos().

Really nice!

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2
  2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
@ 2025-05-06 11:12   ` Paolo Bonzini
  2025-05-06 14:15     ` Richard Henderson
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2025-05-06 11:12 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 22:27, Richard Henderson wrote:
> +    uint64_t v1 = ti_const_val(t1);
> +    uint64_t v2 = ti_const_val(t2);

Are v1 and v2 needed at all?

If starting from z1==o1 and z2==o2, you will always end up with z1|z2 == 
o1|o2 after these:

> +        z1 = (uint32_t)z1 >> shr;
> +        o1 = (uint32_t)o1 >> shr;
> +        z2 = (uint64_t)((int32_t)z2 << (32 - shr));
> +        o2 = (uint64_t)((int32_t)o2 << (32 - shr));

or these:

> +        z1 >>= shr;
> +        o1 >>= shr;
> +        z2 <<= 64 - shr;
> +        o2 <<= 64 - shr;

so fold_masks_zo would do the job.

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
  2025-05-06 11:05   ` Paolo Bonzini
@ 2025-05-06 14:12     ` Richard Henderson
  0 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-06 14:12 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 5/6/25 04:05, Paolo Bonzini wrote:
> On 5/5/25 22:27, Richard Henderson wrote:
>> +    /* Bits that are known 1 and bits that are known 0 must not overlap. */
>> +    tcg_debug_assert((o_mask & ~z_mask) == 0);
>> +
>> +    /* All bits that are not known zero are known one is a constant. */
>> +    if (z_mask == o_mask) {
>> +        return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
>> +    }
>> +    /* All bits known zero is zero. */
>>       if (z_mask == 0) {
>>           return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
>>       }
> Just a nit, but this second check is now redundant.  z_mask == 0 implies that o_mask == 0 
> (otherwise o_mask & z_mask is not zero); therefore the "if (z_mask == o_mask)" must have 
> returned from fold_masks_zos().

Yep, thanks.


r~


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2
  2025-05-06 11:12   ` Paolo Bonzini
@ 2025-05-06 14:15     ` Richard Henderson
  0 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-06 14:15 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 5/6/25 04:12, Paolo Bonzini wrote:
> On 5/5/25 22:27, Richard Henderson wrote:
>> +    uint64_t v1 = ti_const_val(t1);
>> +    uint64_t v2 = ti_const_val(t2);
> 
> Are v1 and v2 needed at all?
> 
> If starting from z1==o1 and z2==o2, you will always end up with z1|z2 == o1|o2 after these:
> 
>> +        z1 = (uint32_t)z1 >> shr;
>> +        o1 = (uint32_t)o1 >> shr;
>> +        z2 = (uint64_t)((int32_t)z2 << (32 - shr));
>> +        o2 = (uint64_t)((int32_t)o2 << (32 - shr));
> 
> or these:
> 
>> +        z1 >>= shr;
>> +        o1 >>= shr;
>> +        z2 <<= 64 - shr;
>> +        o2 <<= 64 - shr;
> 
> so fold_masks_zo would do the job.

Yes indeed, thanks again.

r~


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 01/20] tcg/optimize: Introduce arg_const_val
  2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
@ 2025-06-24 22:21   ` Pierrick Bouvier
  0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 22:21 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 1:27 PM, Richard Henderson wrote:
> Use arg_const_val instead of direct access to the TempOptInfo val
> member.  Rename both val and is_const to catch all direct accesses.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 78 ++++++++++++++++++++++++++------------------------
>   1 file changed, 41 insertions(+), 37 deletions(-)
>

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
  2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
  2025-05-06 11:05   ` Paolo Bonzini
@ 2025-06-24 22:38   ` Pierrick Bouvier
  1 sibling, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 22:38 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 1:27 PM, Richard Henderson wrote:
> Add o_mask mirroring z_mask, but for 1's instead of 0's.
> Drop is_const and val fields, which now logically overlap.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 51 ++++++++++++++++++++++++++++++++++++--------------
>   1 file changed, 37 insertions(+), 14 deletions(-)

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and
  2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
@ 2025-06-24 22:42   ` Pierrick Bouvier
  0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 22:42 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 1:27 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 17 ++++++-----------
>   1 file changed, 6 insertions(+), 11 deletions(-)

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc
  2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
@ 2025-06-24 23:20   ` Pierrick Bouvier
  0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 23:20 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 1:27 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 21 ++++++++-------------
>   1 file changed, 8 insertions(+), 13 deletions(-)

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv
  2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
@ 2025-06-24 23:55   ` Pierrick Bouvier
  0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 23:55 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 5/5/25 1:27 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 8 ++++++--
>   1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index faee3e8580..08d15e5395 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -1917,7 +1917,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
>   
>   static bool fold_eqv(OptContext *ctx, TCGOp *op)
>   {
> -    uint64_t s_mask;
> +    uint64_t z_mask, o_mask, s_mask;
>       TempOptInfo *t1, *t2;
>   
>       if (fold_const2_commutative(ctx, op) ||
> @@ -1947,8 +1947,12 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
>       }
>   
>       t1 = arg_info(op->args[1]);
> +
> +    z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
> +    o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
>       s_mask = t1->s_mask & t2->s_mask;

Even after writing the truth table for eqv(t1, t2) = ~(t1 ^ t2), I'm not 
sure to understand directly how z_mask and o_mask are derived.

In this case, we have:
t1 | t2 | ~(t1 ^ t2)
0  | 0  | 1
0  | 1  | 0
1  | 0  | 0
1  | 1  | 1

In this commit, and in the series, it's confusing for me to have mask 
values set as 0 for 0, and 1 for 1. When mixing that with bitwise 
operations, it starts to get hard to follow, always having to remember 
if you deal with 0 or 1.

It could really help to have simple helpers for (known) zeroes(v) and 
ones(v). I feel as well some comments would be removed because it would 
become explicit what we are dealing with.

let:
   zeroes(v) = ~v->z_mask
   ones(v) = v->o_mask

res_zeroes = zeroes(t1) & ones(t2) | ones(t1) & zeroes(t2);
z_mask = ~res_zeroes;

which gives:
z_mask = ~zeroes
        = ~((~t1->z & t2->o) | (t1->o & ~t2->z))
        = ~(~t1->z & t2->o) & ~(t1->o & ~t2->z)
        = (t1->z | ~t2->o) | (~t1->o | t2->z)
which is the code we have here.

> -    return fold_masks_s(ctx, op, s_mask);
> +
> +    return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
>   }
>   
>   static bool fold_extract(OptContext *ctx, TCGOp *op)

I'm not necessarily forcing a change, but I don't see myself rewriting 
truth tables and developing expressions on paper for all operations to 
review they are correct.

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>



^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2025-06-24 23:56 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
2025-06-24 22:21   ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
2025-05-06 11:05   ` Paolo Bonzini
2025-05-06 14:12     ` Richard Henderson
2025-06-24 22:38   ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
2025-06-24 22:42   ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
2025-06-24 23:20   ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
2025-06-24 23:55   ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand Richard Henderson
2025-05-05 20:27 ` [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor Richard Henderson
2025-05-05 20:27 ` [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not Richard Henderson
2025-05-05 20:27 ` [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or Richard Henderson
2025-05-05 20:27 ` [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc Richard Henderson
2025-05-05 20:27 ` [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor Richard Henderson
2025-05-05 20:27 ` [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap Richard Henderson
2025-05-05 20:27 ` [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit Richard Henderson
2025-05-05 20:27 ` [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract Richard Henderson
2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
2025-05-06 11:12   ` Paolo Bonzini
2025-05-06 14:15     ` Richard Henderson
2025-05-05 20:27 ` [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts Richard Henderson
2025-05-05 20:27 ` [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu Richard Henderson
2025-05-05 20:27 ` [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond Richard Henderson
2025-05-05 20:27 ` [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract Richard Henderson
2025-05-05 20:27 ` [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).