* [PATCH 00/20] tcg/optimize: Track and use known 1's
@ 2025-05-05 20:27 Richard Henderson
2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
` (19 more replies)
0 siblings, 20 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
We already track and use known 0's; do the same for 1's.
This actually simplifies some of the logical operations,
where before required checking for a constant value.
r~
Richard Henderson (20):
tcg/optimize: Introduce arg_const_val
tcg/optimize: Add one's mask to TempOptInfo
tcg/optimize: Build and use o_bits in fold_and
tcg/optimize: Build and use o_bits in fold_andc
tcg/optimize: Build and use z_bits and o_bits in fold_eqv
tcg/optimize: Build and use z_bits and o_bits in fold_nand
tcg/optimize: Build and use z_bits and o_bits in fold_nor
tcg/optimize: Build and use z_bits and o_bits in fold_not
tcg/optimize: Build and use one and affected bits in fold_or
tcg/optimize: Build and use zero, one and affected bits in fold_orc
tcg/optimize: Build and use o_bits in fold_xor
tcg/optimize: Build and use o_bits in fold_bswap
tcg/optimize: Build and use o_bits in fold_deposit
tcg/optimize: Build and use o_bits in fold_extract
tcg/optimize: Build and use z_bits and o_bits in fold_extract2
tcg/optimize: Build and use o_bits in fold_exts
tcg/optimize: Build and use o_bits in fold_extu
tcg/optimize: Build and use o_bits in fold_movcond
tcg/optimize: Build and use o_bits in fold_sextract
tcg/optimize: Build and use o_bits in fold_shift
tcg/optimize.c | 359 +++++++++++++++++++++++++++++++------------------
1 file changed, 226 insertions(+), 133 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 01/20] tcg/optimize: Introduce arg_const_val
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-06-24 22:21 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
` (18 subsequent siblings)
19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Use arg_const_val instead of direct access to the TempOptInfo val
member. Rename both val and is_const to catch all direct accesses.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 78 ++++++++++++++++++++++++++------------------------
1 file changed, 41 insertions(+), 37 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 10a76c5461..73a272eeb3 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,11 +39,11 @@ typedef struct MemCopyInfo {
} MemCopyInfo;
typedef struct TempOptInfo {
- bool is_const;
+ bool is_const_;
TCGTemp *prev_copy;
TCGTemp *next_copy;
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
- uint64_t val;
+ uint64_t val_;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
} TempOptInfo;
@@ -73,12 +73,12 @@ static inline TempOptInfo *arg_info(TCGArg arg)
static inline bool ti_is_const(TempOptInfo *ti)
{
- return ti->is_const;
+ return ti->is_const_;
}
static inline uint64_t ti_const_val(TempOptInfo *ti)
{
- return ti->val;
+ return ti->val_;
}
static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
@@ -101,6 +101,11 @@ static inline bool arg_is_const(TCGArg arg)
return ts_is_const(arg_temp(arg));
}
+static inline uint64_t arg_const_val(TCGArg arg)
+{
+ return ti_const_val(arg_info(arg));
+}
+
static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
{
return ts_is_const_val(arg_temp(arg), val);
@@ -137,12 +142,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
ti->prev_copy = ts;
QSIMPLEQ_INIT(&ti->mem_copy);
if (ts->kind == TEMP_CONST) {
- ti->is_const = true;
- ti->val = ts->val;
+ ti->is_const_ = true;
+ ti->val_ = ts->val;
ti->z_mask = ts->val;
ti->s_mask = INT64_MIN >> clrsb64(ts->val);
} else {
- ti->is_const = false;
+ ti->is_const_ = false;
ti->z_mask = -1;
ti->s_mask = 0;
}
@@ -229,7 +234,7 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts)
pi->next_copy = ti->next_copy;
ti->next_copy = ts;
ti->prev_copy = ts;
- ti->is_const = false;
+ ti->is_const_ = false;
ti->z_mask = -1;
ti->s_mask = 0;
@@ -394,8 +399,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
di->prev_copy = src_ts;
ni->prev_copy = dst_ts;
si->next_copy = dst_ts;
- di->is_const = si->is_const;
- di->val = si->val;
+ di->is_const_ = si->is_const_;
+ di->val_ = si->val_;
if (!QSIMPLEQ_EMPTY(&si->mem_copy)
&& cmp_better_copy(src_ts, dst_ts) == dst_ts) {
@@ -687,8 +692,8 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
TCGArg y, TCGCond c)
{
if (arg_is_const(x) && arg_is_const(y)) {
- uint64_t xv = arg_info(x)->val;
- uint64_t yv = arg_info(y)->val;
+ uint64_t xv = arg_const_val(x);
+ uint64_t yv = arg_const_val(y);
switch (type) {
case TCG_TYPE_I32:
@@ -801,14 +806,14 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
* TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
*/
if (args_are_copies(*p1, *p2) ||
- (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
+ (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
*p2 = arg_new_constant(ctx, 0);
*pcond = tcg_tst_eqne_cond(cond);
return -1;
}
/* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
- if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
+ if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
*p2 = arg_new_constant(ctx, 0);
*pcond = tcg_tst_ltge_cond(cond);
return -1;
@@ -849,13 +854,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
bh = args[3];
if (arg_is_const(bl) && arg_is_const(bh)) {
- tcg_target_ulong blv = arg_info(bl)->val;
- tcg_target_ulong bhv = arg_info(bh)->val;
+ tcg_target_ulong blv = arg_const_val(bl);
+ tcg_target_ulong bhv = arg_const_val(bh);
uint64_t b = deposit64(blv, 32, 32, bhv);
if (arg_is_const(al) && arg_is_const(ah)) {
- tcg_target_ulong alv = arg_info(al)->val;
- tcg_target_ulong ahv = arg_info(ah)->val;
+ tcg_target_ulong alv = arg_const_val(al);
+ tcg_target_ulong ahv = arg_const_val(ah);
uint64_t a = deposit64(alv, 32, 32, ahv);
r = do_constant_folding_cond_64(a, b, c);
@@ -989,9 +994,8 @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
static bool fold_const1(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1])) {
- uint64_t t;
+ uint64_t t = arg_const_val(op->args[1]);
- t = arg_info(op->args[1])->val;
t = do_constant_folding(op->opc, ctx->type, t, 0);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
@@ -1001,8 +1005,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
static bool fold_const2(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t t1 = arg_info(op->args[1])->val;
- uint64_t t2 = arg_info(op->args[2])->val;
+ uint64_t t1 = arg_const_val(op->args[1]);
+ uint64_t t2 = arg_const_val(op->args[2]);
t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
@@ -1486,8 +1490,8 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
}
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
- uint64_t tv = arg_info(op->args[2])->val;
- uint64_t fv = arg_info(op->args[3])->val;
+ uint64_t tv = arg_const_val(op->args[2]);
+ uint64_t fv = arg_const_val(op->args[3]);
if (tv == -1 && fv == 0) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
@@ -1504,7 +1508,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
}
}
if (arg_is_const(op->args[2])) {
- uint64_t tv = arg_info(op->args[2])->val;
+ uint64_t tv = arg_const_val(op->args[2]);
if (tv == -1) {
op->opc = INDEX_op_or_vec;
op->args[2] = op->args[3];
@@ -1518,7 +1522,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
}
}
if (arg_is_const(op->args[3])) {
- uint64_t fv = arg_info(op->args[3])->val;
+ uint64_t fv = arg_const_val(op->args[3]);
if (fv == 0) {
op->opc = INDEX_op_and_vec;
return fold_and(ctx, op);
@@ -1876,7 +1880,7 @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
static bool fold_dup(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1])) {
- uint64_t t = arg_info(op->args[1])->val;
+ uint64_t t = arg_const_val(op->args[1]);
t = dup_const(TCGOP_VECE(op), t);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
@@ -1886,8 +1890,8 @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
static bool fold_dup2(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
- arg_info(op->args[2])->val);
+ uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
+ arg_const_val(op->args[2]));
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
@@ -1958,8 +1962,8 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
static bool fold_extract2(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t v1 = arg_info(op->args[1])->val;
- uint64_t v2 = arg_info(op->args[2])->val;
+ uint64_t v1 = arg_const_val(op->args[1]);
+ uint64_t v2 = arg_const_val(op->args[2]);
int shr = op->args[3];
if (ctx->type == TCG_TYPE_I32) {
@@ -2127,8 +2131,8 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
swap_commutative(op->args[0], &op->args[2], &op->args[3]);
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
- uint64_t a = arg_info(op->args[2])->val;
- uint64_t b = arg_info(op->args[3])->val;
+ uint64_t a = arg_const_val(op->args[2]);
+ uint64_t b = arg_const_val(op->args[3]);
uint64_t h, l;
TCGArg rl, rh;
TCGOp *op2;
@@ -2330,7 +2334,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
}
a_zmask = arg_info(op->args[1])->z_mask;
- b_val = arg_info(op->args[2])->val;
+ b_val = arg_const_val(op->args[2]);
cond = op->args[3];
if (ctx->type == TCG_TYPE_I32) {
@@ -2418,7 +2422,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
}
src2 = op->args[2];
- val = arg_info(src2)->val;
+ val = arg_const_val(src2);
if (!is_power_of_2(val)) {
return;
}
@@ -2669,7 +2673,7 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
TCGOpcode neg_op;
bool have_neg;
- if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
+ if (!arg_is_const_val(op->args[1], 0)) {
return false;
}
@@ -2719,7 +2723,7 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
/* Fold sub r,x,i to add r,x,-i */
if (arg_is_const(op->args[2])) {
- uint64_t val = arg_info(op->args[2])->val;
+ uint64_t val = arg_const_val(op->args[2]);
op->opc = INDEX_op_add;
op->args[2] = arg_new_constant(ctx, -val);
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-06 11:05 ` Paolo Bonzini
2025-06-24 22:38 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
` (17 subsequent siblings)
19 siblings, 2 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Add o_mask mirroring z_mask, but for 1's instead of 0's.
Drop is_const and val fields, which now logically overlap.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 51 ++++++++++++++++++++++++++++++++++++--------------
1 file changed, 37 insertions(+), 14 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 73a272eeb3..395ad8232a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,12 +39,11 @@ typedef struct MemCopyInfo {
} MemCopyInfo;
typedef struct TempOptInfo {
- bool is_const_;
TCGTemp *prev_copy;
TCGTemp *next_copy;
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
- uint64_t val_;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
+ uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */
uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
} TempOptInfo;
@@ -73,12 +72,14 @@ static inline TempOptInfo *arg_info(TCGArg arg)
static inline bool ti_is_const(TempOptInfo *ti)
{
- return ti->is_const_;
+ /* If all bits that are not known zeros are known ones, it's constant. */
+ return ti->z_mask == ti->o_mask;
}
static inline uint64_t ti_const_val(TempOptInfo *ti)
{
- return ti->val_;
+ /* If constant, both z_mask and o_mask contain the value. */
+ return ti->z_mask;
}
static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
@@ -142,13 +143,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
ti->prev_copy = ts;
QSIMPLEQ_INIT(&ti->mem_copy);
if (ts->kind == TEMP_CONST) {
- ti->is_const_ = true;
- ti->val_ = ts->val;
ti->z_mask = ts->val;
+ ti->o_mask = ts->val;
ti->s_mask = INT64_MIN >> clrsb64(ts->val);
} else {
- ti->is_const_ = false;
ti->z_mask = -1;
+ ti->o_mask = 0;
ti->s_mask = 0;
}
}
@@ -234,8 +234,8 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts)
pi->next_copy = ti->next_copy;
ti->next_copy = ts;
ti->prev_copy = ts;
- ti->is_const_ = false;
ti->z_mask = -1;
+ ti->o_mask = 0;
ti->s_mask = 0;
if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
@@ -390,6 +390,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
op->args[1] = src;
di->z_mask = si->z_mask;
+ di->o_mask = si->o_mask;
di->s_mask = si->s_mask;
if (src_ts->type == dst_ts->type) {
@@ -399,13 +400,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
di->prev_copy = src_ts;
ni->prev_copy = dst_ts;
si->next_copy = dst_ts;
- di->is_const_ = si->is_const_;
- di->val_ = si->val_;
if (!QSIMPLEQ_EMPTY(&si->mem_copy)
&& cmp_better_copy(src_ts, dst_ts) == dst_ts) {
move_mem_copies(dst_ts, src_ts);
}
+ } else if (dst_ts->type == TCG_TYPE_I32) {
+ di->z_mask = (int32_t)di->z_mask;
+ di->o_mask = (int32_t)di->o_mask;
+ di->s_mask |= INT32_MIN;
+ } else {
+ di->z_mask |= MAKE_64BIT_MASK(32, 32);
+ di->o_mask = (uint32_t)di->o_mask;
+ di->s_mask = INT64_MIN;
}
return true;
}
@@ -1032,8 +1039,8 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
* If z_mask allows, fold the output to constant zero.
* The passed s_mask may be augmented by z_mask.
*/
-static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
- uint64_t z_mask, int64_t s_mask)
+static bool fold_masks_zos(OptContext *ctx, TCGOp *op, uint64_t z_mask,
+ uint64_t o_mask, int64_t s_mask)
{
const TCGOpDef *def = &tcg_op_defs[op->opc];
TCGTemp *ts;
@@ -1052,9 +1059,18 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
*/
if (ctx->type == TCG_TYPE_I32) {
z_mask = (int32_t)z_mask;
+ o_mask = (int32_t)o_mask;
s_mask |= INT32_MIN;
}
+ /* Bits that are known 1 and bits that are known 0 must not overlap. */
+ tcg_debug_assert((o_mask & ~z_mask) == 0);
+
+ /* All bits that are not known zero are known one is a constant. */
+ if (z_mask == o_mask) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
+ }
+ /* All bits known zero is zero. */
if (z_mask == 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
}
@@ -1068,20 +1084,27 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
/* Canonicalize s_mask and incorporate data from z_mask. */
rep = clz64(~s_mask);
rep = MAX(rep, clz64(z_mask));
+ rep = MAX(rep, clz64(~o_mask));
rep = MAX(rep - 1, 0);
ti->s_mask = INT64_MIN >> rep;
return true;
}
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, uint64_t s_mask)
+{
+ return fold_masks_zos(ctx, op, z_mask, 0, s_mask);
+}
+
static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
{
- return fold_masks_zs(ctx, op, z_mask, 0);
+ return fold_masks_zos(ctx, op, z_mask, 0, 0);
}
static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
{
- return fold_masks_zs(ctx, op, -1, s_mask);
+ return fold_masks_zos(ctx, op, -1, 0, s_mask);
}
/*
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-06-24 22:42 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
` (16 subsequent siblings)
19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 395ad8232a..c9f0f46b83 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1420,7 +1420,7 @@ static bool fold_addco(OptContext *ctx, TCGOp *op)
static bool fold_and(OptContext *ctx, TCGOp *op)
{
- uint64_t z1, z2, z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
@@ -1432,26 +1432,21 @@ static bool fold_and(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
- z1 = t1->z_mask;
- z2 = t2->z_mask;
- /*
- * Known-zeros does not imply known-ones. Therefore unless
- * arg2 is constant, we can't infer affected bits from it.
- */
- if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
+ /* Affected bits are those not known zero, masked by those known one. */
+ if (fold_affected_mask(ctx, op, t1->z_mask & ~t2->o_mask)) {
return true;
}
- z_mask = z1 & z2;
-
+ z_mask = t1->z_mask & t2->z_mask;
+ o_mask = t1->o_mask & t2->o_mask;
/*
* Sign repetitions are perforce all identical, whether they are 1 or 0.
* Bitwise operations preserve the relative quantity of the repetitions.
*/
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_andc(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (2 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-06-24 23:20 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
` (15 subsequent siblings)
19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index c9f0f46b83..faee3e8580 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1451,7 +1451,7 @@ static bool fold_and(OptContext *ctx, TCGOp *op)
static bool fold_andc(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2(ctx, op) ||
@@ -1463,7 +1463,6 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
- z_mask = t1->z_mask;
if (ti_is_const(t2)) {
/* Fold andc r,x,i to and r,x,~i. */
@@ -1484,20 +1483,16 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
return fold_and(ctx, op);
}
- /*
- * Known-zeros does not imply known-ones. Therefore unless
- * arg2 is constant, we can't infer anything from it.
- */
- if (ti_is_const(t2)) {
- uint64_t v2 = ti_const_val(t2);
- if (fold_affected_mask(ctx, op, z_mask & v2)) {
- return true;
- }
- z_mask &= ~v2;
+ /* Affected bits are those not known zero, masked by those known zero. */
+ if (fold_affected_mask(ctx, op, t1->z_mask & t2->z_mask)) {
+ return true;
}
+ z_mask = t1->z_mask & ~t2->o_mask;
+ o_mask = t1->o_mask & ~t2->z_mask;
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (3 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-06-24 23:55 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand Richard Henderson
` (14 subsequent siblings)
19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index faee3e8580..08d15e5395 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1917,7 +1917,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
static bool fold_eqv(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
@@ -1947,8 +1947,12 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
}
t1 = arg_info(op->args[1]);
+
+ z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
+ o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_extract(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (4 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor Richard Henderson
` (13 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 08d15e5395..9721c80943 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2192,16 +2192,22 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
static bool fold_nand(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, -1)) {
return true;
}
- s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+
+ z_mask = ~(t1->o_mask & t2->o_mask);
+ o_mask = ~(t1->z_mask & t2->z_mask);
+ s_mask = t1->s_mask & t2->s_mask;
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (5 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not Richard Henderson
` (12 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9721c80943..d5256aa02c 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2226,16 +2226,22 @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
static bool fold_nor(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, 0)) {
return true;
}
- s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+
+ z_mask = ~(t1->o_mask | t2->o_mask);
+ o_mask = ~(t1->z_mask | t2->z_mask);
+ s_mask = t1->s_mask & t2->s_mask;
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_not(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (6 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or Richard Henderson
` (11 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index d5256aa02c..8fbf682e6d 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2246,10 +2246,14 @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
static bool fold_not(OptContext *ctx, TCGOp *op)
{
+ TempOptInfo *t1;
+
if (fold_const1(ctx, op)) {
return true;
}
- return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
+
+ t1 = arg_info(op->args[1]);
+ return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
}
static bool fold_or(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (7 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc Richard Henderson
` (10 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8fbf682e6d..22d302c9bf 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2258,7 +2258,7 @@ static bool fold_not(OptContext *ctx, TCGOp *op)
static bool fold_or(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
@@ -2269,9 +2269,17 @@ static bool fold_or(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
+
+ /* Affected bits are those not known one, masked by those known zero. */
+ if (fold_affected_mask(ctx, op, ~t1->o_mask & t2->z_mask)) {
+ return true;
+ }
+
z_mask = t1->z_mask | t2->z_mask;
+ o_mask = t1->o_mask | t2->o_mask;
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_orc(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (8 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor Richard Henderson
` (9 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 22d302c9bf..5081e3db1f 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2284,7 +2284,7 @@ static bool fold_or(OptContext *ctx, TCGOp *op)
static bool fold_orc(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2(ctx, op) ||
@@ -2315,8 +2315,17 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
}
t1 = arg_info(op->args[1]);
+
+ /* Affected bits are those not known one, masked by those known one. */
+ if (fold_affected_mask(ctx, op, ~t1->o_mask & t2->o_mask)) {
+ return true;
+ }
+
+ z_mask = t1->z_mask | ~t2->o_mask;
+ o_mask = t1->o_mask | ~t2->z_mask;
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (9 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap Richard Henderson
` (8 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 5081e3db1f..a61c7ca376 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -3038,7 +3038,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
static bool fold_xor(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
@@ -3050,9 +3050,12 @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
- z_mask = t1->z_mask | t2->z_mask;
+
+ z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
+ o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
/* Propagate constants and copies, fold constant expressions. */
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (10 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit Richard Henderson
` (7 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 49 ++++++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 25 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a61c7ca376..2898a3f913 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1666,53 +1666,52 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
static bool fold_bswap(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask, sign;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1 = arg_info(op->args[1]);
+ int flags = op->args[2];
if (ti_is_const(t1)) {
return tcg_opt_gen_movi(ctx, op, op->args[0],
do_constant_folding(op->opc, ctx->type,
- ti_const_val(t1),
- op->args[2]));
+ ti_const_val(t1), flags));
}
z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
+ s_mask = 0;
+
switch (op->opc) {
case INDEX_op_bswap16:
z_mask = bswap16(z_mask);
- sign = INT16_MIN;
+ o_mask = bswap16(o_mask);
+ if (flags & TCG_BSWAP_OS) {
+ z_mask = (int16_t)z_mask;
+ o_mask = (int16_t)o_mask;
+ s_mask = INT16_MIN;
+ } else if (!(flags & TCG_BSWAP_OZ)) {
+ z_mask |= MAKE_64BIT_MASK(16, 48);
+ }
break;
case INDEX_op_bswap32:
z_mask = bswap32(z_mask);
- sign = INT32_MIN;
+ o_mask = bswap32(o_mask);
+ if (flags & TCG_BSWAP_OS) {
+ z_mask = (int32_t)z_mask;
+ o_mask = (int32_t)o_mask;
+ s_mask = INT32_MIN;
+ } else if (!(flags & TCG_BSWAP_OZ)) {
+ z_mask |= MAKE_64BIT_MASK(32, 32);
+ }
break;
case INDEX_op_bswap64:
z_mask = bswap64(z_mask);
- sign = INT64_MIN;
+ o_mask = bswap64(o_mask);
break;
default:
g_assert_not_reached();
}
- s_mask = 0;
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
- case TCG_BSWAP_OZ:
- break;
- case TCG_BSWAP_OS:
- /* If the sign bit may be 1, force all the bits above to 1. */
- if (z_mask & sign) {
- z_mask |= sign;
- }
- /* The value and therefore s_mask is explicitly sign-extended. */
- s_mask = sign;
- break;
- default:
- /* The high bits are undefined: force all bits above the sign to 1. */
- z_mask |= sign << 1;
- break;
- }
-
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_call(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (11 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract Richard Henderson
` (6 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2898a3f913..886947b82b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1842,7 +1842,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
int ofs = op->args[3];
int len = op->args[4];
int width = 8 * tcg_type_size(ctx->type);
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
if (ti_is_const(t1) && ti_is_const(t2)) {
return tcg_opt_gen_movi(ctx, op, op->args[0],
@@ -1877,7 +1877,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
}
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_divide(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (12 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
` (5 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 886947b82b..c5243392af 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1091,6 +1091,12 @@ static bool fold_masks_zos(OptContext *ctx, TCGOp *op, uint64_t z_mask,
return true;
}
+static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, uint64_t o_mask)
+{
+ return fold_masks_zos(ctx, op, z_mask, o_mask, 0);
+}
+
static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
uint64_t z_mask, uint64_t s_mask)
{
@@ -1958,7 +1964,7 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
static bool fold_extract(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask_old, z_mask;
+ uint64_t z_mask_old, z_mask, o_mask;
TempOptInfo *t1 = arg_info(op->args[1]);
int pos = op->args[2];
int len = op->args[3];
@@ -1974,7 +1980,8 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
return true;
}
- return fold_masks_z(ctx, op, z_mask);
+ o_mask = extract64(t1->o_mask, pos, len);
+ return fold_masks_zo(ctx, op, z_mask, o_mask);
}
static bool fold_extract2(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (13 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-06 11:12 ` Paolo Bonzini
2025-05-05 20:27 ` [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts Richard Henderson
` (4 subsequent siblings)
19 siblings, 1 reply; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 40 ++++++++++++++++++++++++++++------------
1 file changed, 28 insertions(+), 12 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index c5243392af..424fd57922 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1986,21 +1986,37 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
static bool fold_extract2(OptContext *ctx, TCGOp *op)
{
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t v1 = arg_const_val(op->args[1]);
- uint64_t v2 = arg_const_val(op->args[2]);
- int shr = op->args[3];
+ TempOptInfo *t1 = arg_info(op->args[1]);
+ TempOptInfo *t2 = arg_info(op->args[2]);
+ uint64_t v1 = ti_const_val(t1);
+ uint64_t v2 = ti_const_val(t2);
+ uint64_t z1 = t1->z_mask;
+ uint64_t z2 = t2->z_mask;
+ uint64_t o1 = t1->o_mask;
+ uint64_t o2 = t2->o_mask;
+ int shr = op->args[3];
- if (ctx->type == TCG_TYPE_I32) {
- v1 = (uint32_t)v1 >> shr;
- v2 = (uint64_t)((int32_t)v2 << (32 - shr));
- } else {
- v1 >>= shr;
- v2 <<= 64 - shr;
- }
+ if (ctx->type == TCG_TYPE_I32) {
+ v1 = (uint32_t)v1 >> shr;
+ z1 = (uint32_t)z1 >> shr;
+ o1 = (uint32_t)o1 >> shr;
+ v2 = (uint64_t)((int32_t)v2 << (32 - shr));
+ z2 = (uint64_t)((int32_t)z2 << (32 - shr));
+ o2 = (uint64_t)((int32_t)o2 << (32 - shr));
+ } else {
+ v1 >>= shr;
+ z1 >>= shr;
+ o1 >>= shr;
+ v2 <<= 64 - shr;
+ z2 <<= 64 - shr;
+ o2 <<= 64 - shr;
+ }
+
+ if (ti_is_const(t1) && ti_is_const(t2)) {
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
}
- return finish_folding(ctx, op);
+
+ return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
}
static bool fold_exts(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (14 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu Richard Henderson
` (3 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 424fd57922..d7f017accf 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2021,7 +2021,7 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
static bool fold_exts(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask, z_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1;
if (fold_const1(ctx, op)) {
@@ -2030,17 +2030,19 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
s_mask = t1->s_mask;
switch (op->opc) {
case INDEX_op_ext_i32_i64:
s_mask |= INT32_MIN;
z_mask = (int32_t)z_mask;
+ o_mask = (int32_t)o_mask;
break;
default:
g_assert_not_reached();
}
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_extu(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (15 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond Richard Henderson
` (2 subsequent siblings)
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index d7f017accf..1d722bebc2 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2047,25 +2047,31 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
static bool fold_extu(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask;
+ uint64_t z_mask, o_mask;
+ TempOptInfo *t1;
if (fold_const1(ctx, op)) {
return true;
}
- z_mask = arg_info(op->args[1])->z_mask;
+ t1 = arg_info(op->args[1]);
+ z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
+
switch (op->opc) {
case INDEX_op_extrl_i64_i32:
case INDEX_op_extu_i32_i64:
z_mask = (uint32_t)z_mask;
+ o_mask = (uint32_t)o_mask;
break;
case INDEX_op_extrh_i64_i32:
z_mask >>= 32;
+ o_mask >>= 32;
break;
default:
g_assert_not_reached();
}
- return fold_masks_z(ctx, op, z_mask);
+ return fold_masks_zo(ctx, op, z_mask, o_mask);
}
static bool fold_mb(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (16 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract Richard Henderson
2025-05-05 20:27 ` [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift Richard Henderson
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1d722bebc2..1040e67220 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2105,7 +2105,7 @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
static bool fold_movcond(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *tt, *ft;
int i;
@@ -2131,6 +2131,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
tt = arg_info(op->args[3]);
ft = arg_info(op->args[4]);
z_mask = tt->z_mask | ft->z_mask;
+ o_mask = tt->o_mask & ft->o_mask;
s_mask = tt->s_mask & ft->s_mask;
if (ti_is_const(tt) && ti_is_const(ft)) {
@@ -2153,7 +2154,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
}
}
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_mul(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (17 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift Richard Henderson
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1040e67220..f36636ed02 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2681,7 +2681,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
static bool fold_sextract(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask, s_mask_old;
+ uint64_t z_mask, o_mask, s_mask, s_mask_old;
TempOptInfo *t1 = arg_info(op->args[1]);
int pos = op->args[2];
int len = op->args[3];
@@ -2700,7 +2700,8 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
}
z_mask = sextract64(t1->z_mask, pos, len);
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ o_mask = sextract64(t1->o_mask, pos, len);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_shift(OptContext *ctx, TCGOp *op)
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
` (18 preceding siblings ...)
2025-05-05 20:27 ` [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract Richard Henderson
@ 2025-05-05 20:27 ` Richard Henderson
19 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-05 20:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index f36636ed02..ab05206ee5 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2706,7 +2706,7 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
static bool fold_shift(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask, z_mask;
+ uint64_t s_mask, z_mask, o_mask;
TempOptInfo *t1, *t2;
if (fold_const2(ctx, op) ||
@@ -2719,14 +2719,16 @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
t2 = arg_info(op->args[2]);
s_mask = t1->s_mask;
z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
if (ti_is_const(t2)) {
int sh = ti_const_val(t2);
z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+ o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
switch (op->opc) {
--
2.43.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
@ 2025-05-06 11:05 ` Paolo Bonzini
2025-05-06 14:12 ` Richard Henderson
2025-06-24 22:38 ` Pierrick Bouvier
1 sibling, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2025-05-06 11:05 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 22:27, Richard Henderson wrote:
> + /* Bits that are known 1 and bits that are known 0 must not overlap. */
> + tcg_debug_assert((o_mask & ~z_mask) == 0);
> +
> + /* All bits that are not known zero are known one is a constant. */
> + if (z_mask == o_mask) {
> + return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
> + }
> + /* All bits known zero is zero. */
> if (z_mask == 0) {
> return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
> }
Just a nit, but this second check is now redundant. z_mask == 0 implies
that o_mask == 0 (otherwise o_mask & z_mask is not zero); therefore the
"if (z_mask == o_mask)" must have returned from fold_masks_zos().
Really nice!
Paolo
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2
2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
@ 2025-05-06 11:12 ` Paolo Bonzini
2025-05-06 14:15 ` Richard Henderson
0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2025-05-06 11:12 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 22:27, Richard Henderson wrote:
> + uint64_t v1 = ti_const_val(t1);
> + uint64_t v2 = ti_const_val(t2);
Are v1 and v2 needed at all?
If starting from z1==o1 and z2==o2, you will always end up with z1|z2 ==
o1|o2 after these:
> + z1 = (uint32_t)z1 >> shr;
> + o1 = (uint32_t)o1 >> shr;
> + z2 = (uint64_t)((int32_t)z2 << (32 - shr));
> + o2 = (uint64_t)((int32_t)o2 << (32 - shr));
or these:
> + z1 >>= shr;
> + o1 >>= shr;
> + z2 <<= 64 - shr;
> + o2 <<= 64 - shr;
so fold_masks_zo would do the job.
Paolo
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
2025-05-06 11:05 ` Paolo Bonzini
@ 2025-05-06 14:12 ` Richard Henderson
0 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-06 14:12 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 5/6/25 04:05, Paolo Bonzini wrote:
> On 5/5/25 22:27, Richard Henderson wrote:
>> + /* Bits that are known 1 and bits that are known 0 must not overlap. */
>> + tcg_debug_assert((o_mask & ~z_mask) == 0);
>> +
>> + /* All bits that are not known zero are known one is a constant. */
>> + if (z_mask == o_mask) {
>> + return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
>> + }
>> + /* All bits known zero is zero. */
>> if (z_mask == 0) {
>> return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
>> }
> Just a nit, but this second check is now redundant. z_mask == 0 implies that o_mask == 0
> (otherwise o_mask & z_mask is not zero); therefore the "if (z_mask == o_mask)" must have
> returned from fold_masks_zos().
Yep, thanks.
r~
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2
2025-05-06 11:12 ` Paolo Bonzini
@ 2025-05-06 14:15 ` Richard Henderson
0 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2025-05-06 14:15 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 5/6/25 04:12, Paolo Bonzini wrote:
> On 5/5/25 22:27, Richard Henderson wrote:
>> + uint64_t v1 = ti_const_val(t1);
>> + uint64_t v2 = ti_const_val(t2);
>
> Are v1 and v2 needed at all?
>
> If starting from z1==o1 and z2==o2, you will always end up with z1|z2 == o1|o2 after these:
>
>> + z1 = (uint32_t)z1 >> shr;
>> + o1 = (uint32_t)o1 >> shr;
>> + z2 = (uint64_t)((int32_t)z2 << (32 - shr));
>> + o2 = (uint64_t)((int32_t)o2 << (32 - shr));
>
> or these:
>
>> + z1 >>= shr;
>> + o1 >>= shr;
>> + z2 <<= 64 - shr;
>> + o2 <<= 64 - shr;
>
> so fold_masks_zo would do the job.
Yes indeed, thanks again.
r~
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 01/20] tcg/optimize: Introduce arg_const_val
2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
@ 2025-06-24 22:21 ` Pierrick Bouvier
0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 22:21 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 1:27 PM, Richard Henderson wrote:
> Use arg_const_val instead of direct access to the TempOptInfo val
> member. Rename both val and is_const to catch all direct accesses.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 78 ++++++++++++++++++++++++++------------------------
> 1 file changed, 41 insertions(+), 37 deletions(-)
>
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo
2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
2025-05-06 11:05 ` Paolo Bonzini
@ 2025-06-24 22:38 ` Pierrick Bouvier
1 sibling, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 22:38 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 1:27 PM, Richard Henderson wrote:
> Add o_mask mirroring z_mask, but for 1's instead of 0's.
> Drop is_const and val fields, which now logically overlap.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 51 ++++++++++++++++++++++++++++++++++++--------------
> 1 file changed, 37 insertions(+), 14 deletions(-)
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and
2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
@ 2025-06-24 22:42 ` Pierrick Bouvier
0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 22:42 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 1:27 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 17 ++++++-----------
> 1 file changed, 6 insertions(+), 11 deletions(-)
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc
2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
@ 2025-06-24 23:20 ` Pierrick Bouvier
0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 23:20 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 1:27 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 21 ++++++++-------------
> 1 file changed, 8 insertions(+), 13 deletions(-)
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv
2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
@ 2025-06-24 23:55 ` Pierrick Bouvier
0 siblings, 0 replies; 30+ messages in thread
From: Pierrick Bouvier @ 2025-06-24 23:55 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 5/5/25 1:27 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 8 ++++++--
> 1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index faee3e8580..08d15e5395 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -1917,7 +1917,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
>
> static bool fold_eqv(OptContext *ctx, TCGOp *op)
> {
> - uint64_t s_mask;
> + uint64_t z_mask, o_mask, s_mask;
> TempOptInfo *t1, *t2;
>
> if (fold_const2_commutative(ctx, op) ||
> @@ -1947,8 +1947,12 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
> }
>
> t1 = arg_info(op->args[1]);
> +
> + z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
> + o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
> s_mask = t1->s_mask & t2->s_mask;
Even after writing the truth table for eqv(t1, t2) = ~(t1 ^ t2), I'm not
sure to understand directly how z_mask and o_mask are derived.
In this case, we have:
t1 | t2 | ~(t1 ^ t2)
0 | 0 | 1
0 | 1 | 0
1 | 0 | 0
1 | 1 | 1
In this commit, and in the series, it's confusing for me to have mask
values set as 0 for 0, and 1 for 1. When mixing that with bitwise
operations, it starts to get hard to follow, always having to remember
if you deal with 0 or 1.
It could really help to have simple helpers for (known) zeroes(v) and
ones(v). I feel as well some comments would be removed because it would
become explicit what we are dealing with.
let:
zeroes(v) = ~v->z_mask
ones(v) = v->o_mask
res_zeroes = zeroes(t1) & ones(t2) | ones(t1) & zeroes(t2);
z_mask = ~res_zeroes;
which gives:
z_mask = ~zeroes
= ~((~t1->z & t2->o) | (t1->o & ~t2->z))
= ~(~t1->z & t2->o) & ~(t1->o & ~t2->z)
= (t1->z | ~t2->o) | (~t1->o | t2->z)
which is the code we have here.
> - return fold_masks_s(ctx, op, s_mask);
> +
> + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
> }
>
> static bool fold_extract(OptContext *ctx, TCGOp *op)
I'm not necessarily forcing a change, but I don't see myself rewriting
truth tables and developing expressions on paper for all operations to
review they are correct.
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
^ permalink raw reply [flat|nested] 30+ messages in thread
end of thread, other threads:[~2025-06-24 23:56 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-05 20:27 [PATCH 00/20] tcg/optimize: Track and use known 1's Richard Henderson
2025-05-05 20:27 ` [PATCH 01/20] tcg/optimize: Introduce arg_const_val Richard Henderson
2025-06-24 22:21 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 02/20] tcg/optimize: Add one's mask to TempOptInfo Richard Henderson
2025-05-06 11:05 ` Paolo Bonzini
2025-05-06 14:12 ` Richard Henderson
2025-06-24 22:38 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 03/20] tcg/optimize: Build and use o_bits in fold_and Richard Henderson
2025-06-24 22:42 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 04/20] tcg/optimize: Build and use o_bits in fold_andc Richard Henderson
2025-06-24 23:20 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 05/20] tcg/optimize: Build and use z_bits and o_bits in fold_eqv Richard Henderson
2025-06-24 23:55 ` Pierrick Bouvier
2025-05-05 20:27 ` [PATCH 06/20] tcg/optimize: Build and use z_bits and o_bits in fold_nand Richard Henderson
2025-05-05 20:27 ` [PATCH 07/20] tcg/optimize: Build and use z_bits and o_bits in fold_nor Richard Henderson
2025-05-05 20:27 ` [PATCH 08/20] tcg/optimize: Build and use z_bits and o_bits in fold_not Richard Henderson
2025-05-05 20:27 ` [PATCH 09/20] tcg/optimize: Build and use one and affected bits in fold_or Richard Henderson
2025-05-05 20:27 ` [PATCH 10/20] tcg/optimize: Build and use zero, one and affected bits in fold_orc Richard Henderson
2025-05-05 20:27 ` [PATCH 11/20] tcg/optimize: Build and use o_bits in fold_xor Richard Henderson
2025-05-05 20:27 ` [PATCH 12/20] tcg/optimize: Build and use o_bits in fold_bswap Richard Henderson
2025-05-05 20:27 ` [PATCH 13/20] tcg/optimize: Build and use o_bits in fold_deposit Richard Henderson
2025-05-05 20:27 ` [PATCH 14/20] tcg/optimize: Build and use o_bits in fold_extract Richard Henderson
2025-05-05 20:27 ` [PATCH 15/20] tcg/optimize: Build and use z_bits and o_bits in fold_extract2 Richard Henderson
2025-05-06 11:12 ` Paolo Bonzini
2025-05-06 14:15 ` Richard Henderson
2025-05-05 20:27 ` [PATCH 16/20] tcg/optimize: Build and use o_bits in fold_exts Richard Henderson
2025-05-05 20:27 ` [PATCH 17/20] tcg/optimize: Build and use o_bits in fold_extu Richard Henderson
2025-05-05 20:27 ` [PATCH 18/20] tcg/optimize: Build and use o_bits in fold_movcond Richard Henderson
2025-05-05 20:27 ` [PATCH 19/20] tcg/optimize: Build and use o_bits in fold_sextract Richard Henderson
2025-05-05 20:27 ` [PATCH 20/20] tcg/optimize: Build and use o_bits in fold_shift Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).