[PATCH 0/3] tcg/i386: Improvements to deposit

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/3] tcg/i386: Improvements to deposit
@ 2023-08-16 14:55 Richard Henderson
  2023-08-16 14:55 ` [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Richard Henderson @ 2023-08-16 14:55 UTC (permalink / raw)
  To: qemu-devel

Richard Henderson (3):
  tcg/i386: Drop BYTEH deposits for 64-bit
  tcg: Fold deposit with zero to and
  tcg/i386: Allow immediate as input to deposit_*

 tcg/i386/tcg-target-con-set.h |  2 +-
 tcg/i386/tcg-target-con-str.h |  1 -
 tcg/i386/tcg-target.h         |  4 ++--
 tcg/optimize.c                | 35 +++++++++++++++++++++++++++++++++++
 tcg/i386/tcg-target.c.inc     | 31 ++++++++++++++++++++++++-------
 5 files changed, 62 insertions(+), 11 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit
  2023-08-16 14:55 [PATCH 0/3] tcg/i386: Improvements to deposit Richard Henderson
@ 2023-08-16 14:55 ` Richard Henderson
  2023-08-17 15:44   ` Peter Maydell
  2023-08-16 14:55 ` [PATCH 2/3] tcg: Fold deposit with zero to and Richard Henderson
  2023-08-16 14:55 ` [PATCH 3/3] tcg/i386: Allow immediate as input to deposit_* Richard Henderson
  2 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2023-08-16 14:55 UTC (permalink / raw)
  To: qemu-devel

It is more useful to allow low-part deposits into all registers
than to restrict allocation for high-byte deposits.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target-con-set.h | 2 +-
 tcg/i386/tcg-target-con-str.h | 1 -
 tcg/i386/tcg-target.h         | 4 ++--
 tcg/i386/tcg-target.c.inc     | 7 +++----
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 5ea3a292f0..3949d49538 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -33,7 +33,7 @@ C_O1_I1(r, q)
 C_O1_I1(r, r)
 C_O1_I1(x, r)
 C_O1_I1(x, x)
-C_O1_I2(Q, 0, Q)
+C_O1_I2(q, 0, q)
 C_O1_I2(q, r, re)
 C_O1_I2(r, 0, ci)
 C_O1_I2(r, 0, r)
diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h
index 24e6bcb80d..95a30e58cd 100644
--- a/tcg/i386/tcg-target-con-str.h
+++ b/tcg/i386/tcg-target-con-str.h
@@ -19,7 +19,6 @@ REGS('D', 1u << TCG_REG_EDI)
 REGS('r', ALL_GENERAL_REGS)
 REGS('x', ALL_VECTOR_REGS)
 REGS('q', ALL_BYTEL_REGS)     /* regs that can be used as a byte operand */
-REGS('Q', ALL_BYTEH_REGS)     /* regs with a second byte (e.g. %ah) */
 REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)  /* qemu_ld/st */
 REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS)    /* qemu_st8_i32 data */
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 2a2e3fffa8..30cce01ca4 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -227,8 +227,8 @@ typedef enum {
 #define TCG_TARGET_HAS_cmpsel_vec       -1
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
-    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
-     ((ofs) == 0 && (len) == 16))
+    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
+     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
 #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 
 /* Check for the possibility of high-byte extraction and, for 64-bit,
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index a6b2eae995..ba40dd0f4d 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -144,7 +144,6 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 # define TCG_REG_L1 TCG_REG_EDX
 #endif
 
-#define ALL_BYTEH_REGS         0x0000000fu
 #if TCG_TARGET_REG_BITS == 64
 # define ALL_GENERAL_REGS      0x0000ffffu
 # define ALL_VECTOR_REGS       0xffff0000u
@@ -152,7 +151,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #else
 # define ALL_GENERAL_REGS      0x000000ffu
 # define ALL_VECTOR_REGS       0x00ff0000u
-# define ALL_BYTEL_REGS        ALL_BYTEH_REGS
+# define ALL_BYTEL_REGS        0x0000000fu
 #endif
 #ifdef CONFIG_SOFTMMU
 # define SOFTMMU_RESERVE_REGS  ((1 << TCG_REG_L0) | (1 << TCG_REG_L1))
@@ -2752,7 +2751,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (args[3] == 0 && args[4] == 8) {
             /* load bits 0..7 */
             tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
-        } else if (args[3] == 8 && args[4] == 8) {
+        } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) {
             /* load bits 8..15 */
             tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
         } else if (args[3] == 0 && args[4] == 16) {
@@ -3312,7 +3311,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
-        return C_O1_I2(Q, 0, Q);
+        return C_O1_I2(q, 0, q);
 
     case INDEX_op_setcond_i32:
     case INDEX_op_setcond_i64:
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] tcg: Fold deposit with zero to and
  2023-08-16 14:55 [PATCH 0/3] tcg/i386: Improvements to deposit Richard Henderson
  2023-08-16 14:55 ` [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit Richard Henderson
@ 2023-08-16 14:55 ` Richard Henderson
  2023-08-17 15:50   ` Peter Maydell
  2023-08-21 12:05   ` Philippe Mathieu-Daudé
  2023-08-16 14:55 ` [PATCH 3/3] tcg/i386: Allow immediate as input to deposit_* Richard Henderson
  2 siblings, 2 replies; 11+ messages in thread
From: Richard Henderson @ 2023-08-16 14:55 UTC (permalink / raw)
  To: qemu-devel

Inserting a zero into a value, or inserting a value
into zero at offset 0 my be implemented with AND.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index d2156367a3..956114b631 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1279,6 +1279,8 @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
 
 static bool fold_deposit(OptContext *ctx, TCGOp *op)
 {
+    TCGOpcode and_opc;
+
     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
         uint64_t t1 = arg_info(op->args[1])->val;
         uint64_t t2 = arg_info(op->args[2])->val;
@@ -1287,6 +1289,39 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
     }
 
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        and_opc = INDEX_op_and_i32;
+        break;
+    case TCG_TYPE_I64:
+        and_opc = INDEX_op_and_i64;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (arg_is_const(op->args[1])
+        && arg_info(op->args[1])->val == 0
+        && op->args[3] == 0) {
+        uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
+
+        op->opc = and_opc;
+        op->args[1] = op->args[2];
+        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
+        ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
+        return false;
+    }
+
+    if (arg_is_const(op->args[2])
+        && arg_info(op->args[2])->val == 0) {
+        uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
+
+        op->opc = and_opc;
+        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
+        ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
+        return false;
+    }
+
     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
                             op->args[3], op->args[4],
                             arg_info(op->args[2])->z_mask);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] tcg/i386: Allow immediate as input to deposit_*
  2023-08-16 14:55 [PATCH 0/3] tcg/i386: Improvements to deposit Richard Henderson
  2023-08-16 14:55 ` [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit Richard Henderson
  2023-08-16 14:55 ` [PATCH 2/3] tcg: Fold deposit with zero to and Richard Henderson
@ 2023-08-16 14:55 ` Richard Henderson
  2023-08-17 15:55   ` Peter Maydell
  2 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2023-08-16 14:55 UTC (permalink / raw)
  To: qemu-devel

We can use MOVB and MOVW with an immediate just as easily
as with a register input.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target-con-set.h |  2 +-
 tcg/i386/tcg-target.c.inc     | 26 ++++++++++++++++++++++----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 3949d49538..7d00a7dde8 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -33,7 +33,7 @@ C_O1_I1(r, q)
 C_O1_I1(r, r)
 C_O1_I1(x, r)
 C_O1_I1(x, x)
-C_O1_I2(q, 0, q)
+C_O1_I2(q, 0, qi)
 C_O1_I2(q, r, re)
 C_O1_I2(r, 0, ci)
 C_O1_I2(r, 0, r)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index ba40dd0f4d..3045b56002 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -276,6 +276,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
 #define OPC_MOVB_EvIz   (0xc6)
 #define OPC_MOVL_EvIz	(0xc7)
+#define OPC_MOVB_Ib     (0xb0)
 #define OPC_MOVL_Iv     (0xb8)
 #define OPC_MOVBE_GyMy  (0xf0 | P_EXT38)
 #define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
@@ -2750,13 +2751,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     OP_32_64(deposit):
         if (args[3] == 0 && args[4] == 8) {
             /* load bits 0..7 */
-            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
+            if (const_a2) {
+                tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0),
+                            0, a0, 0);
+                tcg_out8(s, a2);
+            } else {
+                tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
+            }
         } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) {
             /* load bits 8..15 */
-            tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
+            if (const_a2) {
+                tcg_out8(s, OPC_MOVB_Ib + a0 + 4);
+                tcg_out8(s, a2);
+            } else {
+                tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
+            }
         } else if (args[3] == 0 && args[4] == 16) {
             /* load bits 0..15 */
-            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
+            if (const_a2) {
+                tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0),
+                            0, a0, 0);
+                tcg_out16(s, a2);
+            } else {
+                tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
+            }
         } else {
             g_assert_not_reached();
         }
@@ -3311,7 +3329,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
-        return C_O1_I2(q, 0, q);
+        return C_O1_I2(q, 0, qi);
 
     case INDEX_op_setcond_i32:
     case INDEX_op_setcond_i64:
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit
  2023-08-16 14:55 ` [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit Richard Henderson
@ 2023-08-17 15:44   ` Peter Maydell
  2023-08-17 22:04     ` Richard Henderson
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Maydell @ 2023-08-17 15:44 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Wed, 16 Aug 2023 at 16:01, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> It is more useful to allow low-part deposits into all registers
> than to restrict allocation for high-byte deposits.

>  #define TCG_TARGET_deposit_i32_valid(ofs, len) \
> -    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
> -     ((ofs) == 0 && (len) == 16))
> +    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
> +     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
>  #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid


> @@ -2752,7 +2751,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          if (args[3] == 0 && args[4] == 8) {
>              /* load bits 0..7 */
>              tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
> -        } else if (args[3] == 8 && args[4] == 8) {
> +        } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) {

Should we assert(TCG_TARGET_REG_BITS == 32) rather than making it part of the
condition? If I understand the change to the deposit_i32_valid macro above, we
should never get here with 8, 8 if TCG_TARGET_REG_BITS is 64.

>              /* load bits 8..15 */
>              tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
>          } else if (args[3] == 0 && args[4] == 16) {

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] tcg: Fold deposit with zero to and
  2023-08-16 14:55 ` [PATCH 2/3] tcg: Fold deposit with zero to and Richard Henderson
@ 2023-08-17 15:50   ` Peter Maydell
  2023-08-17 22:07     ` Richard Henderson
  2023-08-21 12:05   ` Philippe Mathieu-Daudé
  1 sibling, 1 reply; 11+ messages in thread
From: Peter Maydell @ 2023-08-17 15:50 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Wed, 16 Aug 2023 at 15:58, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Inserting a zero into a value, or inserting a value
> into zero at offset 0 my be implemented with AND.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/optimize.c | 35 +++++++++++++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index d2156367a3..956114b631 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -1279,6 +1279,8 @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
>
>  static bool fold_deposit(OptContext *ctx, TCGOp *op)
>  {
> +    TCGOpcode and_opc;
> +
>      if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
>          uint64_t t1 = arg_info(op->args[1])->val;
>          uint64_t t2 = arg_info(op->args[2])->val;
> @@ -1287,6 +1289,39 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
>          return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
>      }
>
> +    switch (ctx->type) {
> +    case TCG_TYPE_I32:
> +        and_opc = INDEX_op_and_i32;
> +        break;
> +    case TCG_TYPE_I64:
> +        and_opc = INDEX_op_and_i64;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    if (arg_is_const(op->args[1])
> +        && arg_info(op->args[1])->val == 0
> +        && op->args[3] == 0) {
> +        uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);

The docs for the TCG deposit op don't say what the restrictions on the
immediate args are, but this will be UB for QEMU if args[4] is 0.
Have we already sanitized those somewhere?

> +
> +        op->opc = and_opc;
> +        op->args[1] = op->args[2];
> +        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
> +        ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
> +        return false;
> +    }
> +
> +    if (arg_is_const(op->args[2])
> +        && arg_info(op->args[2])->val == 0) {
> +        uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
> +
> +        op->opc = and_opc;
> +        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
> +        ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
> +        return false;
> +    }
> +
>      ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
>                              op->args[3], op->args[4],
>                              arg_info(op->args[2])->z_mask);
> --

thanks
-- PMM


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] tcg/i386: Allow immediate as input to deposit_*
  2023-08-16 14:55 ` [PATCH 3/3] tcg/i386: Allow immediate as input to deposit_* Richard Henderson
@ 2023-08-17 15:55   ` Peter Maydell
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Maydell @ 2023-08-17 15:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Wed, 16 Aug 2023 at 15:58, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We can use MOVB and MOVW with an immediate just as easily
> as with a register input.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---


Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit
  2023-08-17 15:44   ` Peter Maydell
@ 2023-08-17 22:04     ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2023-08-17 22:04 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 8/17/23 08:44, Peter Maydell wrote:
> On Wed, 16 Aug 2023 at 16:01, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> It is more useful to allow low-part deposits into all registers
>> than to restrict allocation for high-byte deposits.
> 
>>   #define TCG_TARGET_deposit_i32_valid(ofs, len) \
>> -    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
>> -     ((ofs) == 0 && (len) == 16))
>> +    (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
>> +     (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
>>   #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
> 
> 
>> @@ -2752,7 +2751,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>>           if (args[3] == 0 && args[4] == 8) {
>>               /* load bits 0..7 */
>>               tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
>> -        } else if (args[3] == 8 && args[4] == 8) {
>> +        } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) {
> 
> Should we assert(TCG_TARGET_REG_BITS == 32) rather than making it part of the
> condition?

The if/else chain ends in g_assert_not_reached().

> If I understand the change to the deposit_i32_valid macro above, we
> should never get here with 8, 8 if TCG_TARGET_REG_BITS is 64.

Correct.


r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] tcg: Fold deposit with zero to and
  2023-08-17 15:50   ` Peter Maydell
@ 2023-08-17 22:07     ` Richard Henderson
  2023-08-18  8:51       ` Peter Maydell
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2023-08-17 22:07 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On 8/17/23 08:50, Peter Maydell wrote:
>> +    if (arg_is_const(op->args[1])
>> +        && arg_info(op->args[1])->val == 0
>> +        && op->args[3] == 0) {
>> +        uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
> 
> The docs for the TCG deposit op don't say what the restrictions on the
> immediate args are, but this will be UB for QEMU if args[4] is 0.
> Have we already sanitized those somewhere?

tcg_gen_deposit_{i32,i64} do so.


r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] tcg: Fold deposit with zero to and
  2023-08-17 22:07     ` Richard Henderson
@ 2023-08-18  8:51       ` Peter Maydell
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Maydell @ 2023-08-18  8:51 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Thu, 17 Aug 2023 at 23:07, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 8/17/23 08:50, Peter Maydell wrote:
> >> +    if (arg_is_const(op->args[1])
> >> +        && arg_info(op->args[1])->val == 0
> >> +        && op->args[3] == 0) {
> >> +        uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
> >
> > The docs for the TCG deposit op don't say what the restrictions on the
> > immediate args are, but this will be UB for QEMU if args[4] is 0.
> > Have we already sanitized those somewhere?
>
> tcg_gen_deposit_{i32,i64} do so.

Cool.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] tcg: Fold deposit with zero to and
  2023-08-16 14:55 ` [PATCH 2/3] tcg: Fold deposit with zero to and Richard Henderson
  2023-08-17 15:50   ` Peter Maydell
@ 2023-08-21 12:05   ` Philippe Mathieu-Daudé
  1 sibling, 0 replies; 11+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-08-21 12:05 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 16/8/23 16:55, Richard Henderson wrote:
> Inserting a zero into a value, or inserting a value
> into zero at offset 0 my be implemented with AND.

Typo "may".

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 35 +++++++++++++++++++++++++++++++++++
>   1 file changed, 35 insertions(+)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index d2156367a3..956114b631 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -1279,6 +1279,8 @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
>   
>   static bool fold_deposit(OptContext *ctx, TCGOp *op)
>   {
> +    TCGOpcode and_opc;
> +
>       if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
>           uint64_t t1 = arg_info(op->args[1])->val;
>           uint64_t t2 = arg_info(op->args[2])->val;
> @@ -1287,6 +1289,39 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
>           return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
>       }
>   
> +    switch (ctx->type) {
> +    case TCG_TYPE_I32:
> +        and_opc = INDEX_op_and_i32;
> +        break;
> +    case TCG_TYPE_I64:
> +        and_opc = INDEX_op_and_i64;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    if (arg_is_const(op->args[1])
> +        && arg_info(op->args[1])->val == 0
> +        && op->args[3] == 0) {

            /* Inserting a value into zero at offset 0. */

> +        uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
> +
> +        op->opc = and_opc;
> +        op->args[1] = op->args[2];
> +        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
> +        ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
> +        return false;
> +    }
> +
> +    if (arg_is_const(op->args[2])
> +        && arg_info(op->args[2])->val == 0) {

            /* Inserting a zero into a value. */

> +        uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
> +
> +        op->opc = and_opc;
> +        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
> +        ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
> +        return false;
> +    }

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>




^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-08-21 12:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-16 14:55 [PATCH 0/3] tcg/i386: Improvements to deposit Richard Henderson
2023-08-16 14:55 ` [PATCH 1/3] tcg/i386: Drop BYTEH deposits for 64-bit Richard Henderson
2023-08-17 15:44   ` Peter Maydell
2023-08-17 22:04     ` Richard Henderson
2023-08-16 14:55 ` [PATCH 2/3] tcg: Fold deposit with zero to and Richard Henderson
2023-08-17 15:50   ` Peter Maydell
2023-08-17 22:07     ` Richard Henderson
2023-08-18  8:51       ` Peter Maydell
2023-08-21 12:05   ` Philippe Mathieu-Daudé
2023-08-16 14:55 ` [PATCH 3/3] tcg/i386: Allow immediate as input to deposit_* Richard Henderson
2023-08-17 15:55   ` Peter Maydell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).