qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64
@ 2013-03-30 15:33 Aurelien Jarno
  2013-03-30 15:33 ` [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64 Aurelien Jarno
  2013-03-30 16:00 ` [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Richard Henderson
  0 siblings, 2 replies; 6+ messages in thread
From: Aurelien Jarno @ 2013-03-30 15:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Add 32-bit and 64-bit mulu2 and muls2 TCG ops.

On IA64, 32-bit ops should just ignore the 32 most significant bits of
registers, and can leave them with non-zero values. This means registers
should be zero/sign extended before doing the actual multiplying. This
leave some slots in the bundle to possibly load a constant.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   93 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    8 ++---
 2 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 2373d9e..a46234d 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -278,6 +278,8 @@ enum {
     OPC_SUB_A3                = 0x10128000000ull,
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
+    OPC_XMA_H_F2              = 0x1dc00000000ull,
+    OPC_XMA_HU_F2             = 0x1d800000000ull,
     OPC_XOR_A1                = 0x10078000000ull,
     OPC_ZXT1_I29              = 0x00080000000ull,
     OPC_ZXT2_I29              = 0x00088000000ull,
@@ -1098,6 +1100,79 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
                    tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
 }
 
+static inline void tcg_out_mul2_i32(TCGContext *s, TCGArg retl, TCGArg reth,
+                                    TCGArg arg1, int const_arg1,
+                                    TCGArg arg2, int const_arg2,
+                                    int is_signed)
+{
+    uint64_t opc2, opc3;
+
+    if (const_arg1 && arg1 != 0) {
+        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                          TCG_REG_R2, arg1, TCG_REG_R0);
+    } else if (is_signed) {
+        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
+    } else {
+        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
+    }
+    if (const_arg2 && arg2 != 0) {
+        opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                          TCG_REG_R3, arg2, TCG_REG_R0);
+    } else if (is_signed) {
+        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
+    } else {
+        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
+    }
+
+    tcg_out_bundle(s, miI,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   opc2,
+                   opc3);
+    tcg_out_bundle(s, mmI,
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18,
+                               TCG_REG_F6, TCG_REG_R2),
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18,
+                               TCG_REG_F7, TCG_REG_R3),
+                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    tcg_out_bundle(s, mmF,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
+                               TCG_REG_F7, TCG_REG_F0));
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, reth, retl, 32, 31));
+}
+
+static inline void tcg_out_mul2_i64(TCGContext *s, TCGArg retl,
+                                    TCGArg reth, TCGArg arg1, TCGArg arg2,
+                                    int is_signed)
+{
+    uint64_t opc_xma_h_f2;
+
+    opc_xma_h_f2 = is_signed ? OPC_XMA_H_F2 : OPC_XMA_HU_F2;
+
+    tcg_out_bundle(s, mmI,
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F8, arg1),
+                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F9, arg2),
+                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+    tcg_out_bundle(s, mmf,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F8,
+                               TCG_REG_F9, TCG_REG_F0));
+    tcg_out_bundle(s, mmF,
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_f2 (TCG_REG_P0, opc_xma_h_f2, TCG_REG_F7, TCG_REG_F8,
+                               TCG_REG_F9, TCG_REG_F0));
+    tcg_out_bundle(s, mmI,
+                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6),
+                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, reth, TCG_REG_F7),
+                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+}
+
 static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
                                    TCGArg arg2, int const_arg2)
 {
@@ -2107,6 +2182,20 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_mul_i64:
         tcg_out_mul(s, args[0], args[1], args[2]);
         break;
+    case INDEX_op_mulu2_i32:
+        tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2],
+                         args[3], const_args[3], 0);
+        break;
+    case INDEX_op_mulu2_i64:
+        tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 0);
+        break;
+    case INDEX_op_muls2_i32:
+        tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2],
+                         args[3], const_args[3], 1);
+        break;
+    case INDEX_op_muls2_i64:
+        tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 1);
+        break;
 
     case INDEX_op_sar_i32:
         tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]);
@@ -2275,6 +2364,8 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_xor_i32, { "r", "rI", "rI" } },
 
     { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+    { INDEX_op_mulu2_i32, { "r", "r", "rI", "rI" } },
+    { INDEX_op_muls2_i32, { "r", "r", "rI", "rI" } },
 
     { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
     { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
@@ -2322,6 +2413,8 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_xor_i64, { "r", "rI", "rI" } },
 
     { INDEX_op_mul_i64, { "r", "rZ", "rZ" } },
+    { INDEX_op_mulu2_i64, { "r", "r", "rZ", "rZ" } },
+    { INDEX_op_muls2_i64, { "r", "r", "rZ", "rZ" } },
 
     { INDEX_op_sar_i64, { "r", "rZ", "ri" } },
     { INDEX_op_shl_i64, { "r", "rZ", "ri" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index e3d72ea..75e357e 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -140,10 +140,10 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i32         0
 #define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muls2_i64        1
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64
  2013-03-30 15:33 [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Aurelien Jarno
@ 2013-03-30 15:33 ` Aurelien Jarno
  2013-03-30 15:54   ` Richard Henderson
  2013-03-30 16:00 ` [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Richard Henderson
  1 sibling, 1 reply; 6+ messages in thread
From: Aurelien Jarno @ 2013-03-30 15:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Add 32-bit and 64-bit add2 and sub2 TCG ops.

On IA64, 32-bit ops should just ignore the 32 most significant bits of
registers, and can leave them with non-zero values. This means a 32-bit
comparison instruction should be used for add2_i32 and sub32_i32.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    8 +++----
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index a46234d..552992e 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -211,6 +211,7 @@ enum {
 
 enum {
     OPC_ADD_A1                = 0x10000000000ull,
+    OPC_ADD1_A1               = 0x10008000000ull,
     OPC_AND_A1                = 0x10060000000ull,
     OPC_AND_A3                = 0x10160000000ull,
     OPC_ANDCM_A1              = 0x10068000000ull,
@@ -276,6 +277,7 @@ enum {
     OPC_ST8_M4                = 0x08cc0000000ull,
     OPC_SUB_A1                = 0x10028000000ull,
     OPC_SUB_A3                = 0x10128000000ull,
+    OPC_SUB1_A1               = 0x10020000000ull,
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
     OPC_XMA_H_F2              = 0x1dc00000000ull,
@@ -1564,6 +1566,38 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
                    opc2);
 }
 
+static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
+                                TCGArg arg1l, TCGArg arg1h,
+                                TCGArg arg2l, TCGArg arg2h,
+                                int cmp4)
+{
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
+                                 TCG_REG_R2, arg1l, cmp4));
+    tcg_out_bundle(s, mII,
+                   tcg_opc_a1(TCG_REG_P6, OPC_ADD1_A1, reth, arg1h, arg2h),
+                   tcg_opc_a1(TCG_REG_P7, OPC_ADD_A1, reth, arg1h, arg2h),
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, retl, 0, TCG_REG_R2));
+}
+
+static inline void tcg_out_sub2(TCGContext *s, TCGArg retl, TCGArg reth,
+                                TCGArg arg1l, TCGArg arg1h,
+                                TCGArg arg2l, TCGArg arg2h,
+                                int cmp4)
+{
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1(TCG_REG_P0, OPC_SUB_A1, TCG_REG_R2, arg1l, arg2l),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
+                                 arg1l, TCG_REG_R2, cmp4));
+    tcg_out_bundle(s, mII,
+                   tcg_opc_a1(TCG_REG_P6, OPC_SUB1_A1, reth, arg1h, arg2h),
+                   tcg_opc_a1(TCG_REG_P7, OPC_SUB_A1, reth, arg1h, arg2h),
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, retl, 0, TCG_REG_R2));
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "exec/softmmu_defs.h"
@@ -2131,6 +2165,23 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1],
                     args[2], const_args[2]);
         break;
+    case INDEX_op_add2_i32:
+        tcg_out_add2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 1);
+        break;
+    case INDEX_op_add2_i64:
+        tcg_out_add2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 0);
+        break;
+    case INDEX_op_sub2_i32:
+        tcg_out_sub2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 1);
+        break;
+    case INDEX_op_sub2_i64:
+        tcg_out_sub2(s, args[0], args[1], args[2],
+                        args[3], args[4], args[5], 0);
+        break;
+
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
         tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1],
@@ -2352,7 +2403,9 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st_i32, { "rZ", "r" } },
 
     { INDEX_op_add_i32, { "r", "rI", "rI" } },
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
     { INDEX_op_sub_i32, { "r", "rI", "rI" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
 
     { INDEX_op_and_i32, { "r", "rI", "rI" } },
     { INDEX_op_andc_i32, { "r", "rI", "rI" } },
@@ -2401,7 +2454,9 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st_i64, { "rZ", "r" } },
 
     { INDEX_op_add_i64, { "r", "rI", "rI" } },
+    { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
     { INDEX_op_sub_i64, { "r", "rI", "rI" } },
+    { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rZ", "rZ" } },
 
     { INDEX_op_and_i64, { "r", "rI", "rI" } },
     { INDEX_op_andc_i64, { "r", "rI", "rI" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 75e357e..6c70e41 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -136,10 +136,10 @@ typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_sub2_i64         1
 #define TCG_TARGET_HAS_mulu2_i32        1
 #define TCG_TARGET_HAS_mulu2_i64        1
 #define TCG_TARGET_HAS_muls2_i32        1
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64
  2013-03-30 15:33 ` [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64 Aurelien Jarno
@ 2013-03-30 15:54   ` Richard Henderson
  2013-03-30 22:05     ` Aurelien Jarno
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Henderson @ 2013-03-30 15:54 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> +static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
> +                                TCGArg arg1l, TCGArg arg1h,
> +                                TCGArg arg2l, TCGArg arg2h,
> +                                int cmp4)
> +{
> +    tcg_out_bundle(s, MmI,
> +                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
> +                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
> +                                 TCG_REG_R2, arg1l, cmp4));

I seem to recall a 1-cycle cross-unit delay, going between M to I units?
Maybe that was just the itanic1, it's been so long...?

Anyway, in this case it's easy to avoid by putting the nop first and using
an mII bundle.

That said, the code looks correct.

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64
  2013-03-30 15:33 [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Aurelien Jarno
  2013-03-30 15:33 ` [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64 Aurelien Jarno
@ 2013-03-30 16:00 ` Richard Henderson
  2013-03-30 22:04   ` Aurelien Jarno
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Henderson @ 2013-03-30 16:00 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> +    if (const_arg1 && arg1 != 0) {
> +        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> +                          TCG_REG_R2, arg1, TCG_REG_R0);
> +    } else if (is_signed) {
> +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
> +    } else {
> +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
> +    }
> +    if (const_arg2 && arg2 != 0) {
> +        opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> +                          TCG_REG_R3, arg2, TCG_REG_R0);
> +    } else if (is_signed) {
> +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
> +    } else {
> +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
> +    }
> +

Why the check form arg[12] != 0?  Seems like a wasted test, especially if
the optimizer is enabled and we ought never see it.

I would have said you shouldn't bother implementing the _i32 versions for
ia64, because the generic fallback would produce the same code.  But in
this case it's more about doing the job with fewer bundles.

Otherwise, the code looks correct,

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64
  2013-03-30 16:00 ` [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Richard Henderson
@ 2013-03-30 22:04   ` Aurelien Jarno
  0 siblings, 0 replies; 6+ messages in thread
From: Aurelien Jarno @ 2013-03-30 22:04 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Mar 30, 2013 at 09:00:40AM -0700, Richard Henderson wrote:
> On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> > +    if (const_arg1 && arg1 != 0) {
> > +        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> > +                          TCG_REG_R2, arg1, TCG_REG_R0);
> > +    } else if (is_signed) {
> > +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1);
> > +    } else {
> > +        opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1);
> > +    }
> > +    if (const_arg2 && arg2 != 0) {
> > +        opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> > +                          TCG_REG_R3, arg2, TCG_REG_R0);
> > +    } else if (is_signed) {
> > +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2);
> > +    } else {
> > +        opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2);
> > +    }
> > +
> 
> Why the check form arg[12] != 0?  Seems like a wasted test, especially if
> the optimizer is enabled and we ought never see it.

It comes from copy and paste from other code, dating from before the
optimizer pass. It looks like some code can be removed.

> I would have said you shouldn't bother implementing the _i32 versions for
> ia64, because the generic fallback would produce the same code.  But in
> this case it's more about doing the job with fewer bundles.

Yes, the code would be basically the same, but with more bundles, so I
still think it make sense to implement it.

> Otherwise, the code looks correct,
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64
  2013-03-30 15:54   ` Richard Henderson
@ 2013-03-30 22:05     ` Aurelien Jarno
  0 siblings, 0 replies; 6+ messages in thread
From: Aurelien Jarno @ 2013-03-30 22:05 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Mar 30, 2013 at 08:54:03AM -0700, Richard Henderson wrote:
> On 03/30/2013 08:33 AM, Aurelien Jarno wrote:
> > +static inline void tcg_out_add2(TCGContext *s, TCGArg retl, TCGArg reth,
> > +                                TCGArg arg1l, TCGArg arg1h,
> > +                                TCGArg arg2l, TCGArg arg2h,
> > +                                int cmp4)
> > +{
> > +    tcg_out_bundle(s, MmI,
> > +                   tcg_opc_a1(TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, arg1l, arg2l),
> > +                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> > +                   tcg_opc_cmp_a(TCG_REG_P0, TCG_COND_LTU,
> > +                                 TCG_REG_R2, arg1l, cmp4));
> 
> I seem to recall a 1-cycle cross-unit delay, going between M to I units?
> Maybe that was just the itanic1, it's been so long...?
> 
> Anyway, in this case it's easy to avoid by putting the nop first and using
> an mII bundle.

I'll do that in the next version, thanks for the review.

> That said, the code looks correct.
> 
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> 
> 
> r~
> 
> 
> 
> 

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-03-30 22:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-30 15:33 [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Aurelien Jarno
2013-03-30 15:33 ` [Qemu-devel] [PATCH 2/2] tcg-ia64: implement add2_i32/i64 and sub2_i32/i64 Aurelien Jarno
2013-03-30 15:54   ` Richard Henderson
2013-03-30 22:05     ` Aurelien Jarno
2013-03-30 16:00 ` [Qemu-devel] [PATCH 1/2] tcg-ia64: implement mulu2_i32/i64 and muls2_i32/i64 Richard Henderson
2013-03-30 22:04   ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).