[Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements
@ 2012-10-09 20:32 Aurelien Jarno
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 1/4] tcg/ia64: use stack for TCG temps Aurelien Jarno
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-09 20:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

This patch series improve the IA64 backend by adding the movcond
instruction and optimizing the qemu_ld/st ops.

It also includes a patch from Blue Swirl posted more than a year
ago to use stack for TCG temps.

Aurelien Jarno (3):
  tcg/ia64: implement movcond_i32/64
  tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops
  tcg/ia64: slightly optimize TLB access code

Blue Swirl (1):
  tcg/ia64: use stack for TCG temps

 tcg/ia64/tcg-target.c |  141 +++++++++++++++++++++++++++++++++----------------
 tcg/ia64/tcg-target.h |    4 +-
 2 files changed, 97 insertions(+), 48 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 1/4] tcg/ia64: use stack for TCG temps
  2012-10-09 20:32 [Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements Aurelien Jarno
@ 2012-10-09 20:32 ` Aurelien Jarno
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 2/4] tcg/ia64: implement movcond_i32/64 Aurelien Jarno
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-09 20:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: Blue Swirl, Aurelien Jarno

From: Blue Swirl <blauwirbel@gmail.com>

Use stack instead of temp_buf array in CPUState for TCG temps.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 705712f..4cba344 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -2269,9 +2269,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     int frame_size;
 
     /* reserve some stack space */
-    frame_size = TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size = TCG_STATIC_CALL_ARGS_SIZE +
+                 CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* First emit adhoc function descriptor */
     *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */
@@ -2378,6 +2381,4 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
 
     tcg_add_target_add_op_defs(ia64_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 2/4] tcg/ia64: implement movcond_i32/64
  2012-10-09 20:32 [Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements Aurelien Jarno
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 1/4] tcg/ia64: use stack for TCG temps Aurelien Jarno
@ 2012-10-09 20:32 ` Aurelien Jarno
  2012-10-10 20:50   ` Richard Henderson
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops Aurelien Jarno
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 4/4] tcg/ia64: slightly optimize TLB access code Aurelien Jarno
  3 siblings, 1 reply; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-09 20:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Implement movcond_i32/64 on ia64 hosts. It is not possible to have
immediate compare arguments without adding a new bundle, but it is
possible to have 22-bit immediate value arguments.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   38 ++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    4 ++--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 4cba344..d4d350f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1404,6 +1404,34 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
                    tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
 }
 
+static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
+                                   TCGArg c1, TCGArg c2,
+                                   TCGArg v1, int const_v1,
+                                   TCGArg v2, int const_v2, int cmp4)
+{
+    uint64_t opc1, opc2;
+
+    if (const_v1) {
+        opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+    } else if (ret == v1) {
+        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    } else {
+        opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+    }
+    if (const_v2) {
+        opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+    } else if (ret == v2) {
+        opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+    } else {
+        opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+    }
+
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4),
+                   opc1,
+                   opc2);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -2106,6 +2134,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_setcond_i64:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        args[3], const_args[3], args[4], const_args[4], 1);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        args[3], const_args[3], args[4], const_args[4], 0);
+        break;
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -2196,6 +2232,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
 
     { INDEX_op_brcond_i32, { "rI", "rI" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
@@ -2245,6 +2282,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
 
     { INDEX_op_brcond_i64, { "rI", "rI" } },
     { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+    { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
 
     { INDEX_op_qemu_ld8u, { "r", "r" } },
     { INDEX_op_qemu_ld8s, { "r", "r" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 368aee4..5e7d970 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -131,10 +131,10 @@ typedef enum {
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_movcond_i64      0
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub r1, r0, r3 */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 2/4] tcg/ia64: implement movcond_i32/64
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 2/4] tcg/ia64: implement movcond_i32/64 Aurelien Jarno
@ 2012-10-10 20:50   ` Richard Henderson
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2012-10-10 20:50 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 10/09/2012 01:32 PM, Aurelien Jarno wrote:
> Implement movcond_i32/64 on ia64 hosts. It is not possible to have
> immediate compare arguments without adding a new bundle, but it is
> possible to have 22-bit immediate value arguments.
> 
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops
  2012-10-09 20:32 [Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements Aurelien Jarno
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 1/4] tcg/ia64: use stack for TCG temps Aurelien Jarno
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 2/4] tcg/ia64: implement movcond_i32/64 Aurelien Jarno
@ 2012-10-09 20:32 ` Aurelien Jarno
  2012-10-10 20:56   ` Richard Henderson
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 4/4] tcg/ia64: slightly optimize TLB access code Aurelien Jarno
  3 siblings, 1 reply; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-09 20:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Remove suboptimal register shifting in qemu_ld/st ops, introduced at the
CONFIG_TCG_PASS_AREG0 time.

As mem_idx is now loaded in register R58/R59 for the slow path, we have
to make sure to do it last, to not add additional register constraints.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   76 ++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 39 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index d4d350f..16edc1b 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1438,7 +1438,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
 
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
-   R56 is loaded with the address, zero extented on 32-bit targets. */
+   R57 is loaded with the address, zero extented on 32-bit targets. */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                     int s_bits, uint64_t offset_rw,
                                     uint64_t offset_addend)
@@ -1456,9 +1456,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                    tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
                                offset_rw, TCG_REG_R2),
 #if TARGET_LONG_BITS == 32
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg),
+                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
 #else
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56,
+                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
                               0, addr_reg),
 #endif
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
@@ -1466,12 +1466,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
     tcg_out_bundle(s, mII,
                    tcg_opc_m3 (TCG_REG_P0,
                                (TARGET_LONG_BITS == 32
-                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57,
+                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
                                TCG_REG_R2, offset_addend - offset_rw),
                    tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
-                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
+                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1508,8 +1508,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57,
-                               mem_index, TCG_REG_R0),
+                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+                               TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1517,7 +1517,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     if (bswap && s_bits == 1) {
@@ -1541,23 +1541,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-    /* XXX/FIXME: suboptimal */
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R57, 0, TCG_REG_R56),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0));
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+                                   mem_index, TCG_REG_R0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
         tcg_out_bundle(s, miB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+                                   mem_index, TCG_REG_R0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R8, TCG_REG_R8, 0xb),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1609,8 +1603,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57,
-                              0, data_reg),
+                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+                               TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[opc]));
@@ -1618,31 +1612,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R56),
+                               TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
 
     if (!bswap || opc == 0) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     } else if (opc == 1) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 15, 15),
+                                   TCG_REG_R2, data_reg, 15, 15));
+        tcg_out_bundle(s, miI,
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
     } else if (opc == 2) {
-        tcg_out_bundle(s, mII,
+        tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 31, 31),
+                                   TCG_REG_R2, data_reg, 31, 31));
+        tcg_out_bundle(s, miI,
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
+                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1650,25 +1655,18 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+                                   0, data_reg),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
     }
 
-    /* XXX/FIXME: suboptimal */
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R58, 0, TCG_REG_R57),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R57, 0, TCG_REG_R56));
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
+                               mem_index, TCG_REG_R0),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops Aurelien Jarno
@ 2012-10-10 20:56   ` Richard Henderson
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2012-10-10 20:56 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 10/09/2012 01:32 PM, Aurelien Jarno wrote:
> Remove suboptimal register shifting in qemu_ld/st ops, introduced at the
> CONFIG_TCG_PASS_AREG0 time.
> 
> As mem_idx is now loaded in register R58/R59 for the slow path, we have
> to make sure to do it last, to not add additional register constraints.
> 
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Qemu-devel] [PATCH 4/4] tcg/ia64: slightly optimize TLB access code
  2012-10-09 20:32 [Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements Aurelien Jarno
                   ` (2 preceding siblings ...)
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops Aurelien Jarno
@ 2012-10-09 20:32 ` Aurelien Jarno
  2012-10-10 20:58   ` Richard Henderson
  3 siblings, 1 reply; 8+ messages in thread
From: Aurelien Jarno @ 2012-10-09 20:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

It is possible to slightly optimize the TLB access code, by replacing
the movi + and instructions by a deposit instruction.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/ia64/tcg-target.c |   22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 16edc1b..6f018f4 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -230,6 +230,7 @@ enum {
     OPC_CMP4_LT_A6            = 0x18400000000ull,
     OPC_CMP4_LTU_A6           = 0x1a400000000ull,
     OPC_CMP4_EQ_A6            = 0x1c400000000ull,
+    OPC_DEP_I14               = 0x0ae00000000ull,
     OPC_DEP_Z_I12             = 0x0a600000000ull,
     OPC_EXTR_I11              = 0x0a400002000ull,
     OPC_EXTR_U_I11            = 0x0a400000000ull,
@@ -501,6 +502,18 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm,
+                                   int r3, uint64_t pos, uint64_t len)
+{
+    return opc
+           | ((imm & 0x01) << 36)
+           | ((len & 0x3f) << 27)
+           | ((r3 & 0x7f) << 20)
+           | ((pos & 0x3f) << 14)
+           | ((r1 & 0x7f) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
 {
     return opc
@@ -1444,9 +1457,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
-                               TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                               TCG_REG_R0),
+                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1468,8 +1479,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                (TARGET_LONG_BITS == 32
                                 ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
                                TCG_REG_R2, offset_addend - offset_rw),
-                   tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R57),
+                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
+                               TCG_REG_R57, 63 - s_bits,
+                               TARGET_PAGE_BITS - s_bits - 1),
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH 4/4] tcg/ia64: slightly optimize TLB access code
  2012-10-09 20:32 ` [Qemu-devel] [PATCH 4/4] tcg/ia64: slightly optimize TLB access code Aurelien Jarno
@ 2012-10-10 20:58   ` Richard Henderson
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2012-10-10 20:58 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 10/09/2012 01:32 PM, Aurelien Jarno wrote:
> It is possible to slightly optimize the TLB access code, by replacing
> the movi + and instructions by a deposit instruction.
> 
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-10-10 20:58 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-09 20:32 [Qemu-devel] [PATCH 0/4] tcg/ia64: misc improvements Aurelien Jarno
2012-10-09 20:32 ` [Qemu-devel] [PATCH 1/4] tcg/ia64: use stack for TCG temps Aurelien Jarno
2012-10-09 20:32 ` [Qemu-devel] [PATCH 2/4] tcg/ia64: implement movcond_i32/64 Aurelien Jarno
2012-10-10 20:50   ` Richard Henderson
2012-10-09 20:32 ` [Qemu-devel] [PATCH 3/4] tcg/ia64: remove suboptimal register shifting in qemu_ld/st ops Aurelien Jarno
2012-10-10 20:56   ` Richard Henderson
2012-10-09 20:32 ` [Qemu-devel] [PATCH 4/4] tcg/ia64: slightly optimize TLB access code Aurelien Jarno
2012-10-10 20:58   ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).