qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements
@ 2013-09-06  6:50 Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 01/19] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
                   ` (18 more replies)
  0 siblings, 19 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

The AREG0 to R32 patch fixes a real bug, and if anyone
cared should be applied to stable.

Most of the patches are pure cleanup and improvementm
rearranging generated code to use fewer stop bits, and
therefore reduce execute cycles.

Finally, convert to out-of-line ldst handlers.  All of
this is on top of my current patch sets.  Full tree at

  git://github.com/rth7680/qemu.git tcg-ldst


r~



Richard Henderson (19):
  tcg-ia64: Use TCGMemOp within qemu_ldst routines
  tcg-ia64: Use shortcuts for nop insns
  tcg-ia64: Handle constant calls
  tcg-ia64: Simplify brcond
  tcg-ia64: Move AREG0 to R32
  tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu
  tcg-ia64: Use ADDS for small addition
  tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction
  tcg-ia64: Use A3 form of logical operations
  tcg-ia64 Introduce tcg_opc_mov_a
  tcg-ia64 Introduce tcg_opc_movi_a
  tcg-ia64 Introduce tcg_opc_ext_i
  tcg-ia64 Introduce tcg_opc_bswap64_i
  tcg-ia64: Re-bundle the tlb load
  tcg-ia64: Move bswap for store into tlb load
  tcg-ia64: Move tlb addend load into tlb read
  tcg-i64: Reduce code duplication in tcg_out_qemu_ld
  tcg-ia64: Convert to new ldst helpers
  tcg-ia64: Move part of softmmu slow path out of line

 tcg/ia64/tcg-target.c | 1022 ++++++++++++++++++++++++++-----------------------
 tcg/ia64/tcg-target.h |    6 +-
 2 files changed, 555 insertions(+), 473 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 01/19] tcg-ia64: Use TCGMemOp within qemu_ldst routines
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 02/19] tcg-ia64: Use shortcuts for nop insns Richard Henderson
                   ` (17 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 171 ++++++++++++++++++++++++++------------------------
 1 file changed, 90 insertions(+), 81 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index cd4f1ae..dcf4dd3 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1494,7 +1494,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
    R2 is loaded with the address of the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets. */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
-                                    int s_bits, uint64_t offset_rw,
+                                    TCGMemOp s_bits, uint64_t offset_rw,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
@@ -1536,23 +1536,24 @@ static const void * const qemu_ld_helpers[4] = {
     helper_ldq_mmu,
 };
 
-static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
-    int addr_reg, data_reg, mem_index, s_bits, bswap;
-    uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 };
-    uint64_t opc_ext_i29[8] = { OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
-                                OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 };
+    static const uint64_t opc_ld_m1[4] = {
+        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
+    };
+    static const uint64_t opc_ext_i29[8] = {
+        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
+        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
+    };
+    int addr_reg, data_reg, mem_index;
+    TCGMemOp s_bits, bswap;
 
     data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
-    s_bits = opc & 3;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
+    s_bits = opc & MO_SIZE;
+    bswap = opc & MO_BSWAP;
 
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
@@ -1573,14 +1574,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                                TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-    if (bswap && s_bits == 1) {
+    if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                    TCG_REG_R8, TCG_REG_R3),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R8, TCG_REG_R8, 15, 15));
-    } else if (bswap && s_bits == 2) {
+    } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                    TCG_REG_R8, TCG_REG_R3),
@@ -1594,7 +1595,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-    if (!bswap || s_bits == 0) {
+    if (!bswap) {
         tcg_out_bundle(s, miB,
                        tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
                                    mem_index, TCG_REG_R0),
@@ -1611,7 +1612,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                                    TCG_REG_B0, TCG_REG_B6));
     }
 
-    if (opc == 3) {
+    if (s_bits == MO_64) {
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1621,7 +1622,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc],
+                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
                                    data_reg, TCG_REG_R8));
     }
 }
@@ -1635,22 +1636,21 @@ static const void * const qemu_st_helpers[4] = {
     helper_stq_mmu,
 };
 
-static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
-    int addr_reg, data_reg, mem_index, bswap;
-    uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 };
+    static const uint64_t opc_st_m4[4] = {
+        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
+    };
+    int addr_reg, data_reg, mem_index;
+    TCGMemOp s_bits;
 
     data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
+    s_bits = opc & MO_SIZE;
 
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
-
-    tcg_out_qemu_tlb(s, addr_reg, opc,
+    tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
                      offsetof(CPUArchState, tlb_table[mem_index][0].addend));
 
@@ -1658,9 +1658,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     tcg_out_bundle(s, mLX,
                    tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
                                TCG_REG_R56, 0, TCG_AREG0),
-                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
+                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
-                               (tcg_target_long) qemu_st_helpers[opc]));
+                               (tcg_target_long) qemu_st_helpers[s_bits]));
     tcg_out_bundle(s, MmI,
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
@@ -1669,14 +1669,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
 
-    if (!bswap || opc == 0) {
+    switch (opc) {
+    case MO_8:
+    case MO_16:
+    case MO_32:
+    case MO_64:
         tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
-    } else if (opc == 1) {
+        break;
+
+    case MO_16 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
@@ -1690,7 +1696,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
-    } else if (opc == 2) {
+        break;
+
+    case MO_32 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
@@ -1704,7 +1712,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
-    } else if (opc == 3) {
+        break;
+
+    case MO_64 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
@@ -1713,6 +1723,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
+        break;
+
+    default:
+        tcg_abort();
     }
 
     tcg_out_bundle(s, miB,
@@ -1726,7 +1740,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
 #else /* !CONFIG_SOFTMMU */
 
-static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
     static uint64_t const opc_ld_m1[4] = {
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
@@ -1734,17 +1749,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     static uint64_t const opc_sxt_i29[4] = {
         OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
     };
-    int addr_reg, data_reg, s_bits, bswap;
+    int addr_reg, data_reg;
+    TCGMemOp s_bits, bswap;
 
     data_reg = *args++;
     addr_reg = *args++;
-    s_bits = opc & 3;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
+    s_bits = opc & MO_SIZE;
+    bswap = opc & MO_BSWAP;
 
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
@@ -1762,8 +1773,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
 
-    if (!bswap || s_bits == 0) {
-        if (s_bits == opc) {
+    if (!bswap) {
+        if (!(opc & MO_SIGN)) {
             tcg_out_bundle(s, miI,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
@@ -1777,7 +1788,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
                                        data_reg, data_reg));
         }
-    } else if (s_bits == 3) {
+    } else if (s_bits == MO_64) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
@@ -1785,7 +1796,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb));
     } else {
-        if (s_bits == 1) {
+        if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
@@ -1800,7 +1811,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                       data_reg, data_reg, 31, 31));
         }
-        if (opc == s_bits) {
+        if (!(opc & MO_SIGN)) {
             tcg_out_bundle(s, miI,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1831,28 +1842,28 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
 
-    if (bswap && s_bits == 1) {
+    if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, mII,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 15, 15),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
-    } else if (bswap && s_bits == 2) {
+    } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, mII,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 31, 31),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
-    } else if (bswap && s_bits == 3) {
+    } else if (bswap && s_bits == MO_64) {
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     }
-    if (s_bits != opc) {
+    if (opc & MO_SIGN) {
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1862,24 +1873,22 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 #endif
 }
 
-static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
     static uint64_t const opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    int addr_reg, data_reg, bswap;
+    int addr_reg, data_reg;
 #if TARGET_LONG_BITS == 64
     uint64_t add_guest_base;
 #endif
+    TCGMemOp s_bits, bswap;
 
     data_reg = *args++;
     addr_reg = *args++;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
+    s_bits = opc & MO_SIZE;
+    bswap = opc & MO_BSWAP;
 
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
@@ -1898,7 +1907,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     }
 
     if (bswap) {
-        if (opc == 1) {
+        if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1906,7 +1915,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 2) {
+        } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1914,7 +1923,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 3) {
+        } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1924,7 +1933,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         }
     }
     tcg_out_bundle(s, mmI,
-                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
+                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                data_reg, TCG_REG_R2),
                    tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
@@ -1937,14 +1946,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
     }
 
-    if (!bswap || opc == 0) {
+    if (!bswap) {
         tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
                        add_guest_base,
-                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
+                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     } else {
-        if (opc == 1) {
+        if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1952,7 +1961,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 2) {
+        } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1960,7 +1969,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 3) {
+        } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            add_guest_base,
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1969,7 +1978,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
             data_reg = TCG_REG_R3;
         }
         tcg_out_bundle(s, miI,
-                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
+                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
@@ -2201,39 +2210,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, args, 0);
+        tcg_out_qemu_ld(s, args, MO_UB);
         break;
     case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, 0 | 4);
+        tcg_out_qemu_ld(s, args, MO_SB);
         break;
     case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, 1);
+        tcg_out_qemu_ld(s, args, MO_TEUW);
         break;
     case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, 1 | 4);
+        tcg_out_qemu_ld(s, args, MO_TESW);
         break;
     case INDEX_op_qemu_ld32:
     case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, args, 2);
+        tcg_out_qemu_ld(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, 2 | 4);
+        tcg_out_qemu_ld(s, args, MO_TESL);
         break;
     case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
+        tcg_out_qemu_ld(s, args, MO_TEQ);
         break;
 
     case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, args, 0);
+        tcg_out_qemu_st(s, args, MO_UB);
         break;
     case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, 1);
+        tcg_out_qemu_st(s, args, MO_TEUW);
         break;
     case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, 2);
+        tcg_out_qemu_st(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
+        tcg_out_qemu_st(s, args, MO_TEQ);
         break;
 
     default:
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 02/19] tcg-ia64: Use shortcuts for nop insns
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 01/19] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 03/19] tcg-ia64: Handle constant calls Richard Henderson
                   ` (16 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

There's no need to go through the full opcode-to-insn function call
to generate nops.  This makes the source a bit more readable.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 251 +++++++++++++++++++++++++-------------------------
 1 file changed, 127 insertions(+), 124 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index dcf4dd3..1db8745 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -282,6 +282,9 @@ enum {
     OPC_ZXT1_I29              = 0x00080000000ull,
     OPC_ZXT2_I29              = 0x00088000000ull,
     OPC_ZXT4_I29              = 0x00090000000ull,
+
+    INSN_NOP_M                = OPC_NOP_M48,  /* nop.m 0 */
+    INSN_NOP_I                = OPC_NOP_I18,  /* nop.i 0 */
 };
 
 static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1,
@@ -853,8 +856,8 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
                                TCGReg ret, TCGReg arg)
 {
     tcg_out_bundle(s, mmI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg));
 }
 
@@ -862,7 +865,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 TCGReg reg, tcg_target_long arg)
 {
     tcg_out_bundle(s, mLX,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_l2 (arg),
                    tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg));
 }
@@ -875,8 +878,8 @@ static void tcg_out_br(TCGContext *s, int label_index)
        the existing value and using it again. This ensure that caches and
        memory are kept coherent during retranslation. */
     tcg_out_bundle(s, mmB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1,
                                get_reloc_pcrel21b(s->code_ptr + 2)));
 
@@ -897,7 +900,7 @@ static inline void tcg_out_call(TCGContext *s, TCGArg addr)
                                TCG_REG_B6, TCG_REG_R2, 0));
     tcg_out_bundle(s, mmB,
                    tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
@@ -913,7 +916,7 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
     imm = (uint64_t)disp >> 4;
 
     tcg_out_bundle(s, mLX,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_l3 (imm),
                    tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm));
 }
@@ -930,12 +933,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1,
                                    TCG_REG_R2, TCG_REG_R2),
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6,
                                    TCG_REG_R2, 0));
         tcg_out_bundle(s, mmB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_M,
                        tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4,
                                    TCG_REG_B6));
     }
@@ -945,12 +948,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
 static inline void tcg_out_jmp(TCGContext *s, TCGArg addr)
 {
     tcg_out_bundle(s, mmI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0));
     tcg_out_bundle(s, mmB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
 }
 
@@ -962,14 +965,14 @@ static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
                                   TCG_REG_R2, arg2, arg1),
                        tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
         tcg_out_bundle(s, MmI,
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
                                    TCG_REG_R2, TCG_REG_R2, arg1),
                        tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 }
 
@@ -981,14 +984,14 @@ static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
                                   TCG_REG_R2, arg2, arg1),
                        tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
         tcg_out_bundle(s, MmI,
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
                                    TCG_REG_R2, TCG_REG_R2, arg1),
                        tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 }
 
@@ -1023,7 +1026,7 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
                           TCG_REG_R2, arg1, TCG_REG_R0);
         arg1 = TCG_REG_R2;
     } else {
-        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc1 = INSN_NOP_M;
     }
 
     if (const_arg2 && arg2 != 0) {
@@ -1031,7 +1034,7 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
                           TCG_REG_R3, arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
     } else {
-        opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0);
+        opc2 = INSN_NOP_I;
     }
 
     tcg_out_bundle(s, mII,
@@ -1045,7 +1048,7 @@ static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
                                TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2),
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
 }
@@ -1055,7 +1058,7 @@ static inline void tcg_out_nand(TCGContext *s, TCGArg ret,
                                 TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2),
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
 }
@@ -1065,7 +1068,7 @@ static inline void tcg_out_nor(TCGContext *s, TCGArg ret,
                                TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2),
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
 }
@@ -1075,7 +1078,7 @@ static inline void tcg_out_orc(TCGContext *s, TCGArg ret,
                                TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2),
                    tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2));
 }
@@ -1086,16 +1089,16 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
     tcg_out_bundle(s, mmI,
                    tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1),
                    tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                   INSN_NOP_I);
     tcg_out_bundle(s, mmF,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
                                TCG_REG_F7, TCG_REG_F0));
     tcg_out_bundle(s, miI,
                    tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                   INSN_NOP_I,
+                   INSN_NOP_I);
 }
 
 static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
@@ -1103,8 +1106,8 @@ static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
                                    ret, arg1, arg2, 31 - arg2));
     } else {
@@ -1122,14 +1125,14 @@ static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
                                    ret, arg1, arg2, 63 - arg2));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2));
     }
 }
@@ -1139,13 +1142,13 @@ static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
                                    arg1, 63 - arg2, 31 - arg2));
     } else {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2,
                                    0x1f, arg2),
                        tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
@@ -1158,14 +1161,14 @@ static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
                                    arg1, 63 - arg2, 63 - arg2));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
                                    arg1, arg2));
     }
@@ -1176,8 +1179,8 @@ static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
                                    arg1, arg2, 31 - arg2));
     } else {
@@ -1195,14 +1198,14 @@ static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
                                    arg1, arg2, 63 - arg2));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
                                    arg1, arg2));
     }
@@ -1213,20 +1216,20 @@ static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
                                    TCG_REG_R2, arg1, arg1),
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
                                    TCG_REG_R2, 32 - arg2, 31));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
                                    TCG_REG_R2, arg1, arg1),
                        tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3,
                                    0x1f, arg2));
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3,
                                    0x20, TCG_REG_R3),
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
@@ -1239,8 +1242,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
                                    arg1, 0x40 - arg2));
     } else {
@@ -1252,8 +1255,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2,
                                    arg1, TCG_REG_R2));
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
                                    TCG_REG_R2, TCG_REG_R3));
     }
@@ -1264,7 +1267,7 @@ static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
                                    TCG_REG_R2, arg1, arg1),
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
@@ -1285,8 +1288,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
                                    arg1, arg2));
     } else {
@@ -1298,8 +1301,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
                        tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2,
                                    arg1, TCG_REG_R2));
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
                                    TCG_REG_R2, TCG_REG_R3));
     }
@@ -1309,15 +1312,15 @@ static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
                                TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, miI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_I,
                    tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
 }
 
 static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
 }
@@ -1325,7 +1328,7 @@ static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
 static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
 }
@@ -1333,8 +1336,8 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
 static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, miI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_I,
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
 }
 
@@ -1364,8 +1367,8 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
         i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
     }
     tcg_out_bundle(s, (i1 ? mII : miI),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   INSN_NOP_M,
+                   i1 ? i1 : INSN_NOP_I,
                    i2);
 }
 
@@ -1423,7 +1426,7 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
                           arg1, TCG_REG_R0);
         arg1 = TCG_REG_R2;
     } else {
-        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc1 = INSN_NOP_M;
     }
 
     if (const_arg2 && arg2 != 0) {
@@ -1431,7 +1434,7 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
                           arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
     } else {
-        opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0);
+        opc2 = INSN_NOP_I;
     }
 
     tcg_out_bundle(s, mII,
@@ -1439,8 +1442,8 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
                    opc2,
                    tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4));
     tcg_out_bundle(s, mmB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
                                get_reloc_pcrel21b(s->code_ptr + 2)));
 
@@ -1471,14 +1474,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
     if (const_v1) {
         opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
     } else if (ret == v1) {
-        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc1 = INSN_NOP_M;
     } else {
         opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
     }
     if (const_v2) {
         opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
     } else if (ret == v2) {
-        opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc2 = INSN_NOP_I;
     } else {
         opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
     }
@@ -1498,7 +1501,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1593,13 +1596,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                    TCG_REG_R8, TCG_REG_R3),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
     if (!bswap) {
         tcg_out_bundle(s, miB,
                        tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
                                    mem_index, TCG_REG_R0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
@@ -1614,14 +1617,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     if (s_bits == MO_64) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                    data_reg, 0, TCG_REG_R8));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
                                    data_reg, TCG_REG_R8));
     }
@@ -1679,20 +1682,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                    TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
         break;
 
     case MO_16 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 15, 15));
         tcg_out_bundle(s, miI,
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1702,13 +1705,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 31, 31));
         tcg_out_bundle(s, miI,
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1760,17 +1763,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R3, addr_reg),
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                    TCG_GUEST_BASE_REG, TCG_REG_R3));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R2, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 
     if (!bswap) {
@@ -1778,13 +1781,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, miI,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                           INSN_NOP_I,
+                           INSN_NOP_I);
         } else {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
                                        data_reg, data_reg));
         }
@@ -1792,7 +1795,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb));
     } else {
@@ -1800,26 +1803,26 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                       data_reg, data_reg, 15, 15));
         } else {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                       data_reg, data_reg, 31, 31));
         }
         if (!(opc & MO_SIGN)) {
             tcg_out_bundle(s, miI,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_M,
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb));
         } else {
             tcg_out_bundle(s, mII,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                           INSN_NOP_M,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb),
                            tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
@@ -1833,40 +1836,40 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCG_GUEST_BASE_REG, addr_reg),
                        tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                    data_reg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         tcg_out_bundle(s, mmI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                    data_reg, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 
     if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 15, 15),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 31, 31),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     } else if (bswap && s_bits == MO_64) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     }
     if (opc & MO_SIGN) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
                                    data_reg, data_reg));
     }
@@ -1893,23 +1896,23 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R3, addr_reg),
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                    TCG_GUEST_BASE_REG, TCG_REG_R3));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R2, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 
     if (bswap) {
         if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                           INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
@@ -1917,7 +1920,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                           INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
@@ -1925,8 +1928,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_M,
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, data_reg, 0xb));
             data_reg = TCG_REG_R3;
@@ -1935,15 +1938,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_bundle(s, mmI,
                    tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                data_reg, TCG_REG_R2),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                   INSN_NOP_M,
+                   INSN_NOP_I);
 #else
     if (GUEST_BASE != 0) {
         add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                      TCG_GUEST_BASE_REG, addr_reg);
         addr_reg = TCG_REG_R2;
     } else {
-        add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        add_guest_base = INSN_NOP_M;
     }
 
     if (!bswap) {
@@ -1951,7 +1954,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        add_guest_base,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
@@ -1972,7 +1975,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            add_guest_base,
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, data_reg, 0xb));
             data_reg = TCG_REG_R3;
@@ -1980,8 +1983,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I,
+                       INSN_NOP_I);
     }
 #endif
 }
@@ -2400,7 +2403,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
        an ADDL in the M slot of the next bundle.  */
     if (GUEST_BASE != 0) {
         tcg_out_bundle(s, mlx,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_l2 (GUEST_BASE),
                        tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
                                    TCG_GUEST_BASE_REG, GUEST_BASE));
@@ -2417,13 +2420,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     /* epilogue */
     tb_ret_addr = s->code_ptr;
     tcg_out_bundle(s, miI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
                                TCG_REG_B0, TCG_REG_R32, 0),
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                TCG_REG_R12, frame_size, TCG_REG_R12));
     tcg_out_bundle(s, miB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26,
                                TCG_REG_PFS, TCG_REG_R34),
                    tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 03/19] tcg-ia64: Handle constant calls
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 01/19] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 02/19] tcg-ia64: Use shortcuts for nop insns Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 04/19] tcg-ia64: Simplify brcond Richard Henderson
                   ` (15 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Using only indirect calls results in 3 bundles (one to load the
descriptor address), and 4 stop bits.  By looking through the
descriptor to the constants, we can perform the call with 2
bundles and only 1 stop bit.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 1db8745..6708844 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -224,6 +224,7 @@ enum {
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
     OPC_BRL_SPTK_MANY_X3      = 0x18000001000ull,
+    OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull,
     OPC_CMP_LT_A6             = 0x18000000000ull,
     OPC_CMP_LTU_A6            = 0x1a000000000ull,
     OPC_CMP_EQ_A6             = 0x1c000000000ull,
@@ -582,6 +583,8 @@ static inline uint64_t tcg_opc_l3(uint64_t imm)
     return (imm & 0x07fffffffff00000ull) >> 18;
 }
 
+#define tcg_opc_l4  tcg_opc_l3
+
 static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3)
 {
     return opc
@@ -666,6 +669,15 @@ static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm)
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+    return opc
+           | ((imm & 0x0800000000000000ull) >> 23) /* i */
+           | ((imm & 0x00000000000fffffull) << 13) /* imm20b */
+           | ((b1 & 0x7) << 6)
+           | (qp & 0x3f);
+}
+
 
 /*
  * Relocations
@@ -891,7 +903,23 @@ static void tcg_out_br(TCGContext *s, int label_index)
     }
 }
 
-static inline void tcg_out_call(TCGContext *s, TCGArg addr)
+static inline void tcg_out_calli(TCGContext *s, uintptr_t addr)
+{
+    /* Look through the function descriptor.  */
+    uintptr_t disp, *desc = (uintptr_t *)addr;
+    tcg_out_bundle(s, mlx,
+                   INSN_NOP_M,
+                   tcg_opc_l2 (desc[1]),
+                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1]));
+    disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+    tcg_out_bundle(s, mLX,
+                   INSN_NOP_M,
+                   tcg_opc_l4 (disp),
+                   tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4,
+                               TCG_REG_B0, disp));
+}
+
+static inline void tcg_out_callr(TCGContext *s, TCGReg addr)
 {
     tcg_out_bundle(s, MmI,
                    tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr),
@@ -2002,7 +2030,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_br(s, args[0]);
         break;
     case INDEX_op_call:
-        tcg_out_call(s, args[0]);
+        if (likely(const_args[0])) {
+            tcg_out_calli(s, args[0]);
+        } else {
+            tcg_out_callr(s, args[0]);
+        }
         break;
     case INDEX_op_goto_tb:
         tcg_out_goto_tb(s, args[0]);
@@ -2255,7 +2287,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
 static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_br, { } },
-    { INDEX_op_call, { "r" } },
+    { INDEX_op_call, { "ri" } },
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 04/19] tcg-ia64: Simplify brcond
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (2 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 03/19] tcg-ia64: Handle constant calls Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 05/19] tcg-ia64: Move AREG0 to R32 Richard Henderson
                   ` (14 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

There was a misconception that a stop bit is required between a compare
and the branch that uses the predicate set by the compare.  This lead to
the usage of an extra bundle in which to perform the compare.  The extra
bundle left room for constants to be loaded for use with the compare insn.

If we pack the compare and the branch together in the same bundle, then
there's no longer any room for non-zero constants.  At which point we
can eliminate half the function by not handling them.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 42 +++++++++---------------------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 6708844..413e00f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1442,38 +1442,16 @@ static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
     }
 }
 
-static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
-                                  int const_arg1, TCGArg arg2, int const_arg2,
-                                  int label_index, int cmp4)
+static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+                                  TCGReg arg2, int label_index, int cmp4)
 {
     TCGLabel *l = &s->labels[label_index];
-    uint64_t opc1, opc2;
 
-    if (const_arg1 && arg1 != 0) {
-        opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
-                          arg1, TCG_REG_R0);
-        arg1 = TCG_REG_R2;
-    } else {
-        opc1 = INSN_NOP_M;
-    }
-
-    if (const_arg2 && arg2 != 0) {
-        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
-                          arg2, TCG_REG_R0);
-        arg2 = TCG_REG_R3;
-    } else {
-        opc2 = INSN_NOP_I;
-    }
-
-    tcg_out_bundle(s, mII,
-                   opc1,
-                   opc2,
-                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4));
-    tcg_out_bundle(s, mmB,
-                   INSN_NOP_M,
+    tcg_out_bundle(s, miB,
                    INSN_NOP_M,
-                   tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
-                               get_reloc_pcrel21b(s->code_ptr + 2)));
+                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
+                   tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
+                              get_reloc_pcrel21b(s->code_ptr + 2)));
 
     if (l->has_value) {
         reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value);
@@ -2222,12 +2200,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], const_args[0],
-                       args[1], const_args[1], args[3], 1);
+        tcg_out_brcond(s, args[2], args[0], args[1], args[3], 1);
         break;
     case INDEX_op_brcond_i64:
-        tcg_out_brcond(s, args[2], args[0], const_args[0],
-                       args[1], const_args[1], args[3], 0);
+        tcg_out_brcond(s, args[2], args[0], args[1], args[3], 0);
         break;
     case INDEX_op_setcond_i32:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1);
@@ -2331,7 +2307,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_bswap16_i32, { "r", "rZ" } },
     { INDEX_op_bswap32_i32, { "r", "rZ" } },
 
-    { INDEX_op_brcond_i32, { "rI", "rI" } },
+    { INDEX_op_brcond_i32, { "rZ", "rZ" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 05/19] tcg-ia64: Move AREG0 to R32
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (3 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 04/19] tcg-ia64: Simplify brcond Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 06/19] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
                   ` (13 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Since the move away from the global areg0, we're no longer globally
reserving areg0.  Which means our use of R7 clobbers a call-saved
register.  Shift areg0 into the windowed registers.  Indeed, choose
the incoming parameter register that it comes to us by.

This requires moving the register holding the return address elsewhere.
Choose R33 for tidiness.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 13 ++++++-------
 tcg/ia64/tcg-target.h |  4 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 413e00f..ad5a6b9 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -107,7 +107,6 @@ enum {
 };
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R33,
     TCG_REG_R35,
     TCG_REG_R36,
     TCG_REG_R37,
@@ -2402,8 +2401,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_bundle(s, miI,
                    tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34,
                                TCG_REG_R34, 32, 24, 0),
-                   tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
-                               TCG_AREG0, 0, TCG_REG_R32),
+                   INSN_NOP_I,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
                                TCG_REG_B6, TCG_REG_R33, 0));
 
@@ -2422,7 +2420,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                TCG_REG_R12, -frame_size, TCG_REG_R12),
                    tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
-                               TCG_REG_R32, TCG_REG_B0),
+                               TCG_REG_R33, TCG_REG_B0),
                    tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
 
     /* epilogue */
@@ -2430,7 +2428,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_bundle(s, miI,
                    INSN_NOP_M,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
-                               TCG_REG_B0, TCG_REG_R32, 0),
+                               TCG_REG_B0, TCG_REG_R33, 0),
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                TCG_REG_R12, frame_size, TCG_REG_R12));
     tcg_out_bundle(s, miB,
@@ -2487,16 +2485,17 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3);   /* internal use */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12);  /* stack pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);  /* thread pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R32);  /* return address */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33);  /* return address */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34);  /* PFS */
 
-    /* The following 3 are not in use, are call-saved, but *not* saved
+    /* The following 4 are not in use, are call-saved, but *not* saved
        by the prologue.  Therefore we cannot use them without modifying
        the prologue.  There doesn't seem to be any good reason to use
        these as opposed to the windowed registers.  */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7);
 
     tcg_add_target_add_op_defs(ia64_op_defs);
 }
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 9fe3bf6..65897f2 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -94,6 +94,8 @@ typedef enum {
     TCG_REG_R61,
     TCG_REG_R62,
     TCG_REG_R63,
+
+    TCG_AREG0 = TCG_REG_R32,
 } TCGReg;
 
 #define TCG_CT_CONST_ZERO 0x100
@@ -164,8 +166,6 @@ typedef enum {
 #define TCG_TARGET_HAS_not_i32          0 /* xor r1, -1, r3 */
 #define TCG_TARGET_HAS_not_i64          0 /* xor r1, -1, r3 */
 
-#define TCG_AREG0 TCG_REG_R7
-
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
     start = start & ~(32UL - 1UL);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 06/19] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (4 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 05/19] tcg-ia64: Move AREG0 to R32 Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 07/19] tcg-ia64: Use ADDS for small addition Richard Henderson
                   ` (12 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

When performing an operation with two input registers, we'd leave
the stop bit (and thus an extra cycle) that's only needed when one
or the other input is a constant.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index ad5a6b9..f0c5e0b 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1042,31 +1042,26 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     }
 }
 
-static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
-                               TCGArg arg1, int const_arg1,
-                               TCGArg arg2, int const_arg2)
+static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
+                        int const_arg1, TCGArg arg2, int const_arg2)
 {
-    uint64_t opc1, opc2;
+    uint64_t opc1 = 0, opc2 = 0;
 
     if (const_arg1 && arg1 != 0) {
         opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
                           TCG_REG_R2, arg1, TCG_REG_R0);
         arg1 = TCG_REG_R2;
-    } else {
-        opc1 = INSN_NOP_M;
     }
 
     if (const_arg2 && arg2 != 0) {
         opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
                           TCG_REG_R3, arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
-    } else {
-        opc2 = INSN_NOP_I;
     }
 
-    tcg_out_bundle(s, mII,
-                   opc1,
-                   opc2,
+    tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
+                   opc1 ? opc1 : INSN_NOP_M,
+                   opc2 ? opc2 : INSN_NOP_I,
                    tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 07/19] tcg-ia64: Use ADDS for small addition
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (5 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 06/19] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 08/19] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
                   ` (11 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Avoids a wasted cycle loading up small constants.

Simplify the code assuming the tcg optimizer is going to work
and don't expect the first operand of the add to be constant.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index f0c5e0b..1659421 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1065,6 +1065,19 @@ static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
                    tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
 }
 
+static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
+                               TCGArg arg2, int const_arg2)
+{
+    if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) {
+        tcg_out_bundle(s, mmI,
+                       INSN_NOP_M,
+                       INSN_NOP_M,
+                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
+    } else {
+        tcg_out_alu(s, OPC_ADD_A1, ret, arg1, 0, arg2, const_arg2);
+    }
+}
+
 static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
                                TCGArg arg1, int const_arg1,
                                TCGArg arg2, int const_arg2)
@@ -2066,8 +2079,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_add_i32:
     case INDEX_op_add_i64:
-        tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        tcg_out_add(s, args[0], args[1], args[2], const_args[2]);
         break;
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
@@ -2273,7 +2285,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st16_i32, { "rZ", "r" } },
     { INDEX_op_st_i32, { "rZ", "r" } },
 
-    { INDEX_op_add_i32, { "r", "rI", "rI" } },
+    { INDEX_op_add_i32, { "r", "rZ", "rI" } },
     { INDEX_op_sub_i32, { "r", "rI", "rI" } },
 
     { INDEX_op_and_i32, { "r", "rI", "rI" } },
@@ -2320,7 +2332,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st32_i64, { "rZ", "r" } },
     { INDEX_op_st_i64, { "rZ", "r" } },
 
-    { INDEX_op_add_i64, { "r", "rI", "rI" } },
+    { INDEX_op_add_i64, { "r", "rZ", "rI" } },
     { INDEX_op_sub_i64, { "r", "rI", "rI" } },
 
     { INDEX_op_and_i64, { "r", "rI", "rI" } },
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 08/19] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (6 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 07/19] tcg-ia64: Use ADDS for small addition Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 09/19] tcg-ia64: Use A3 form of logical operations Richard Henderson
                   ` (10 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

We can subtract from more small constants that just 0 with one insn,
and we can add the negative for most small constants.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 1659421..cc8c7a9 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1078,6 +1078,28 @@ static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
     }
 }
 
+static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
+                               int const_arg1, TCGArg arg2, int const_arg2)
+{
+    if (const_arg1 && arg1 == (int8_t)arg1) {
+        if (const_arg2) {
+            tcg_out_movi(s, TCG_TYPE_I64, ret, arg1 - arg2);
+            return;
+        }
+        tcg_out_bundle(s, mmI,
+                       INSN_NOP_M,
+                       INSN_NOP_M,
+                       tcg_opc_a3(TCG_REG_P0, OPC_SUB_A3, ret, arg1, arg2));
+    } else if (const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
+        tcg_out_bundle(s, mmI,
+                       INSN_NOP_M,
+                       INSN_NOP_M,
+                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
+    } else {
+        tcg_out_alu(s, OPC_SUB_A1, ret, arg1, 0, arg2, 0);
+    }
+}
+
 static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
                                TCGArg arg1, int const_arg1,
                                TCGArg arg2, int const_arg2)
@@ -2083,8 +2105,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
-        tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]);
         break;
 
     case INDEX_op_and_i32:
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 09/19] tcg-ia64: Use A3 form of logical operations
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (7 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 08/19] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 10/19] tcg-ia64 Introduce tcg_opc_mov_a Richard Henderson
                   ` (9 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

We can and/or/xor/andcm small constants, saving one cycle.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 52 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index cc8c7a9..de81593 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -261,6 +261,7 @@ enum {
     OPC_MOV_I_I26             = 0x00150000000ull,
     OPC_MOVL_X2               = 0x0c000000000ull,
     OPC_OR_A1                 = 0x10070000000ull,
+    OPC_OR_A3                 = 0x10170000000ull,
     OPC_SETF_EXP_M18          = 0x0c748000000ull,
     OPC_SETF_SIG_M18          = 0x0c708000000ull,
     OPC_SHL_I7                = 0x0f240000000ull,
@@ -279,6 +280,7 @@ enum {
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
     OPC_XOR_A1                = 0x10078000000ull,
+    OPC_XOR_A3                = 0x10178000000ull,
     OPC_ZXT1_I29              = 0x00080000000ull,
     OPC_ZXT2_I29              = 0x00088000000ull,
     OPC_ZXT4_I29              = 0x00090000000ull,
@@ -1042,27 +1044,34 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     }
 }
 
-static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
-                        int const_arg1, TCGArg arg2, int const_arg2)
+static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
+                               TCGReg ret, TCGArg arg1, int const_arg1,
+                               TCGArg arg2, int const_arg2)
 {
-    uint64_t opc1 = 0, opc2 = 0;
-
-    if (const_arg1 && arg1 != 0) {
-        opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                          TCG_REG_R2, arg1, TCG_REG_R0);
-        arg1 = TCG_REG_R2;
-    }
+    uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
 
     if (const_arg2 && arg2 != 0) {
         opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
                           TCG_REG_R3, arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
     }
+    if (const_arg1 && arg1 != 0) {
+        if (opc_a3 && arg1 == (int8_t)arg1) {
+            opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
+        } else {
+            opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                              TCG_REG_R2, arg1, TCG_REG_R0);
+            arg1 = TCG_REG_R2;
+        }
+    }
+    if (opc3 == 0) {
+        opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2);
+    }
 
     tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
                    opc1 ? opc1 : INSN_NOP_M,
                    opc2 ? opc2 : INSN_NOP_I,
-                   tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
+                   opc3);
 }
 
 static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
@@ -1074,7 +1083,7 @@ static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
                        INSN_NOP_M,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
     } else {
-        tcg_out_alu(s, OPC_ADD_A1, ret, arg1, 0, arg2, const_arg2);
+        tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2);
     }
 }
 
@@ -1096,7 +1105,7 @@ static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
                        INSN_NOP_M,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
     } else {
-        tcg_out_alu(s, OPC_SUB_A1, ret, arg1, 0, arg2, 0);
+        tcg_out_alu(s, OPC_SUB_A1, 0, ret, arg1, 0, arg2, 0);
     }
 }
 
@@ -2110,13 +2119,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_and_i32:
     case INDEX_op_and_i64:
-        tcg_out_alu(s, OPC_AND_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
+        tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0],
+                    args[2], const_args[2], args[1], const_args[1]);
         break;
     case INDEX_op_andc_i32:
     case INDEX_op_andc_i64:
-        tcg_out_alu(s, OPC_ANDCM_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0],
+                    args[1], const_args[1], args[2], const_args[2]);
         break;
     case INDEX_op_eqv_i32:
     case INDEX_op_eqv_i64:
@@ -2135,8 +2145,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_or_i32:
     case INDEX_op_or_i64:
-        tcg_out_alu(s, OPC_OR_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
+        tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0],
+                    args[2], const_args[2], args[1], const_args[1]);
         break;
     case INDEX_op_orc_i32:
     case INDEX_op_orc_i64:
@@ -2145,8 +2156,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_xor_i32:
     case INDEX_op_xor_i64:
-        tcg_out_alu(s, OPC_XOR_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
+        tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0],
+                    args[2], const_args[2], args[1], const_args[1]);
         break;
 
     case INDEX_op_mul_i32:
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 10/19] tcg-ia64 Introduce tcg_opc_mov_a
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (8 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 09/19] tcg-ia64: Use A3 form of logical operations Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 11/19] tcg-ia64 Introduce tcg_opc_movi_a Richard Henderson
                   ` (8 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index de81593..3e96af9 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -865,13 +865,18 @@ static inline void tcg_out_bundle(TCGContext *s, int template,
     s->code_ptr += 16;
 }
 
+static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src)
+{
+    return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src);
+}
+
 static inline void tcg_out_mov(TCGContext *s, TCGType type,
                                TCGReg ret, TCGReg arg)
 {
     tcg_out_bundle(s, mmI,
                    INSN_NOP_M,
                    INSN_NOP_M,
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg));
+                   tcg_opc_mov_a(TCG_REG_P0, ret, arg));
 }
 
 static inline void tcg_out_movi(TCGContext *s, TCGType type,
@@ -1520,14 +1525,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
     } else if (ret == v1) {
         opc1 = INSN_NOP_M;
     } else {
-        opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+        opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
     }
     if (const_v2) {
         opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
     } else if (ret == v2) {
         opc2 = INSN_NOP_I;
     } else {
-        opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+        opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2);
     }
 
     tcg_out_bundle(s, MmI,
@@ -1557,8 +1562,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
 #if TARGET_LONG_BITS == 32
                    tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
 #else
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
-                              0, addr_reg),
+                   tcg_opc_mov_a(TCG_REG_P0, TCG_REG_R57, addr_reg),
 #endif
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_AREG0));
@@ -1609,8 +1613,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1663,8 +1666,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
                        INSN_NOP_I,
-                       tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
-                                   data_reg, 0, TCG_REG_R8));
+                       tcg_opc_mov_a(TCG_REG_P0, data_reg, TCG_REG_R8));
     } else {
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
@@ -1703,8 +1705,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[s_bits]));
@@ -1724,8 +1725,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I);
         break;
 
@@ -1737,8 +1737,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 15, 15));
         tcg_out_bundle(s, miI,
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
@@ -1753,8 +1752,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 31, 31));
         tcg_out_bundle(s, miI,
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
@@ -1765,8 +1763,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 11/19] tcg-ia64 Introduce tcg_opc_movi_a
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (9 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 10/19] tcg-ia64 Introduce tcg_opc_mov_a Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 12/19] tcg-ia64 Introduce tcg_opc_ext_i Richard Henderson
                   ` (7 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 3e96af9..8057f40 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -879,6 +879,12 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
                    tcg_opc_mov_a(TCG_REG_P0, ret, arg));
 }
 
+static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src)
+{
+    assert(src == sextract64(src, 0, 22));
+    return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0);
+}
+
 static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 TCGReg reg, tcg_target_long arg)
 {
@@ -1056,16 +1062,14 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
     uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
 
     if (const_arg2 && arg2 != 0) {
-        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                          TCG_REG_R3, arg2, TCG_REG_R0);
+        opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2);
         arg2 = TCG_REG_R3;
     }
     if (const_arg1 && arg1 != 0) {
         if (opc_a3 && arg1 == (int8_t)arg1) {
             opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
         } else {
-            opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                              TCG_REG_R2, arg1, TCG_REG_R0);
+            opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1);
             arg1 = TCG_REG_R2;
         }
     }
@@ -1429,8 +1433,7 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
         } else {
             /* Otherwise, load any constant into a temporary.  Do this into
                the first I slot to help out with cross-unit delays.  */
-            i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                            TCG_REG_R2, a2, TCG_REG_R0);
+            i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2);
             a2 = TCG_REG_R2;
         }
     }
@@ -1509,8 +1512,8 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
 {
     tcg_out_bundle(s, MmI,
                    tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
-                   tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, 1, TCG_REG_R0),
-                   tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
+                   tcg_opc_movi_a(TCG_REG_P6, ret, 1),
+                   tcg_opc_movi_a(TCG_REG_P7, ret, 0));
 }
 
 static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
@@ -1521,14 +1524,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
     uint64_t opc1, opc2;
 
     if (const_v1) {
-        opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+        opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1);
     } else if (ret == v1) {
         opc1 = INSN_NOP_M;
     } else {
         opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
     }
     if (const_v2) {
-        opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+        opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2);
     } else if (ret == v2) {
         opc2 = INSN_NOP_I;
     } else {
@@ -1647,15 +1650,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
     if (!bswap) {
         tcg_out_bundle(s, miB,
-                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                                   mem_index, TCG_REG_R0),
+                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
                        INSN_NOP_I,
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
         tcg_out_bundle(s, miB,
-                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                                   mem_index, TCG_REG_R0),
+                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R8, TCG_REG_R8, 0xb),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1776,8 +1777,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
-                               mem_index, TCG_REG_R0),
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 12/19] tcg-ia64 Introduce tcg_opc_ext_i
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (10 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 11/19] tcg-ia64 Introduce tcg_opc_movi_a Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 13/19] tcg-ia64 Introduce tcg_opc_bswap64_i Richard Henderson
                   ` (6 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Being able to "extend" from 64-bits (with a mov) simplifies
a few places where the conditional breaks the train of thought.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 54 +++++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 8057f40..6c95920 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1383,6 +1383,20 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
     }
 }
 
+static const uint64_t opc_ext_i29[8] = {
+    OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
+    OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
+};
+
+static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s)
+{
+    if ((opc & MO_SIZE) == MO_64) {
+        return tcg_opc_mov_a(qp, d, s);
+    } else {
+        return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s);
+    }
+}
+
 static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
                                TCGArg ret, TCGArg arg)
 {
@@ -1562,11 +1576,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
     tcg_out_bundle(s, mII,
                    tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
                                offset_rw, TCG_REG_R2),
-#if TARGET_LONG_BITS == 32
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
-#else
-                   tcg_opc_mov_a(TCG_REG_P0, TCG_REG_R57, addr_reg),
-#endif
+                   tcg_opc_ext_i(TCG_REG_P0,
+                                 TARGET_LONG_BITS == 32 ? MO_UL : MO_Q,
+                                 TCG_REG_R57, addr_reg),
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_AREG0));
     tcg_out_bundle(s, mII,
@@ -1596,10 +1608,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_ld_m1[4] = {
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
     };
-    static const uint64_t opc_ext_i29[8] = {
-        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
-        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
-    };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits, bswap;
 
@@ -1663,18 +1671,10 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCG_REG_B0, TCG_REG_B6));
     }
 
-    if (s_bits == MO_64) {
-        tcg_out_bundle(s, miI,
-                       INSN_NOP_M,
-                       INSN_NOP_I,
-                       tcg_opc_mov_a(TCG_REG_P0, data_reg, TCG_REG_R8));
-    } else {
-        tcg_out_bundle(s, miI,
-                       INSN_NOP_M,
-                       INSN_NOP_I,
-                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
-                                   data_reg, TCG_REG_R8));
-    }
+    tcg_out_bundle(s, miI,
+                   INSN_NOP_M,
+                   INSN_NOP_I,
+                   tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8));
 }
 
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
@@ -1790,9 +1790,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     static uint64_t const opc_ld_m1[4] = {
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
     };
-    static uint64_t const opc_sxt_i29[4] = {
-        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
-    };
     int addr_reg, data_reg;
     TCGMemOp s_bits, bswap;
 
@@ -1829,8 +1826,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
                            INSN_NOP_I,
-                           tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
-                                       data_reg, data_reg));
+                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
         }
     } else if (s_bits == MO_64) {
             tcg_out_bundle(s, mII,
@@ -1866,8 +1862,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                            INSN_NOP_M,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb),
-                           tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
-                                       data_reg, data_reg));
+                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
         }
     }
 #else
@@ -1911,8 +1906,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
                        INSN_NOP_I,
-                       tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
-                                   data_reg, data_reg));
+                       tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
     }
 #endif
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 13/19] tcg-ia64 Introduce tcg_opc_bswap64_i
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (11 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 12/19] tcg-ia64 Introduce tcg_opc_ext_i Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 14/19] tcg-ia64: Re-bundle the tlb load Richard Henderson
                   ` (5 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 63 +++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 6c95920..3c177c6 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1406,12 +1406,17 @@ static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
                    tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
 }
 
+static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s)
+{
+    return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb);
+}
+
 static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, mII,
                    INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
-                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
+                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
 }
 
 static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
@@ -1419,7 +1424,7 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
     tcg_out_bundle(s, mII,
                    INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
-                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
+                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
 }
 
 static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
@@ -1427,7 +1432,7 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
     tcg_out_bundle(s, miI,
                    INSN_NOP_M,
                    INSN_NOP_I,
-                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
+                   tcg_opc_bswap64_i(TCG_REG_P0, ret, arg));
 }
 
 static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
@@ -1665,8 +1670,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     } else {
         tcg_out_bundle(s, miB,
                        tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R8, TCG_REG_R8, 0xb),
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     }
@@ -1740,8 +1744,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R2, TCG_REG_R2, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
         data_reg = TCG_REG_R2;
         break;
 
@@ -1755,8 +1758,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R2, TCG_REG_R2, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
         data_reg = TCG_REG_R2;
         break;
 
@@ -1765,8 +1767,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R2, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg));
         data_reg = TCG_REG_R2;
         break;
 
@@ -1833,8 +1834,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       data_reg, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     } else {
         if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
@@ -1855,13 +1855,11 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, miI,
                            INSN_NOP_M,
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       data_reg, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
         } else {
             tcg_out_bundle(s, mII,
                            INSN_NOP_M,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       data_reg, data_reg, 0xb),
+                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg),
                            tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
         }
     }
@@ -1886,21 +1884,18 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                        INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 15, 15),
-                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                   data_reg, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, mII,
                        INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 31, 31),
-                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                   data_reg, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     } else if (bswap && s_bits == MO_64) {
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
                        INSN_NOP_I,
-                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                   data_reg, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     }
     if (opc & MO_SIGN) {
         tcg_out_bundle(s, miI,
@@ -1950,23 +1945,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                            INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            INSN_NOP_M,
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
             data_reg = TCG_REG_R3;
         }
     }
@@ -1996,23 +1990,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            add_guest_base,
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
             data_reg = TCG_REG_R3;
         }
         tcg_out_bundle(s, miI,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 14/19] tcg-ia64: Re-bundle the tlb load
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (12 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 13/19] tcg-ia64 Introduce tcg_opc_bswap64_i Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 15/19] tcg-ia64: Move bswap for store into " Richard Henderson
                   ` (4 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

This sequencing requires 5 stop bits instead of 6, and has room left
over to pre-load the tlb addend, and bswap data prior to being stored.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 77 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 23 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 3c177c6..2fa6dd5 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1564,38 +1564,69 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
 }
 
 #if defined(CONFIG_SOFTMMU)
+/* We're expecting to use an signed 22-bit immediate add.  */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+                  > 0x1fffff)
+
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
-   R57 is loaded with the address, zero extented on 32-bit targets. */
-static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
-                                    TCGMemOp s_bits, uint64_t offset_rw,
-                                    uint64_t offset_addend)
-{
-    tcg_out_bundle(s, mII,
-                   INSN_NOP_M,
-                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
+   R57 is loaded with the address, zero extented on 32-bit targets.
+   R1, R3 are clobbered. */
+static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
+                                    TCGMemOp s_bits, int off_rw, int off_add)
+{
+     /*
+        .mii
+        mov	r2 = off_rw
+        extr.u	r3 = addr_reg, ...		# extract tlb page
+        zxt4	r57 = addr_reg                  # or mov for 64-bit guest
+        ;;
+        .mii
+        addl	r2 = r2, areg0
+        shl	r3 = r3, cteb                   # via dep.z
+        dep	r1 = 0, r57, ...                # zero page ofs, keep align
+        ;;
+        .mmi
+        add	r2 = r2, r3
+        ;;
+        ld4	r3 = [r2], off_add-off_rw	# or ld8 for 64-bit guest
+        nop
+        ;;
+        .mmi
+        nop
+        cmp.eq	p6, p7 = r3, r58
+        nop
+        ;;
+      */
+    tcg_out_bundle(s, miI,
+                   tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw),
+                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
-                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
-                               TCG_REG_R2, 63 - CPU_TLB_ENTRY_BITS,
-                               63 - CPU_TLB_ENTRY_BITS));
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
-                               offset_rw, TCG_REG_R2),
                    tcg_opc_ext_i(TCG_REG_P0,
                                  TARGET_LONG_BITS == 32 ? MO_UL : MO_Q,
-                                 TCG_REG_R57, addr_reg),
+                                 TCG_REG_R57, addr_reg));
+    tcg_out_bundle(s, miI,
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                               TCG_REG_R2, TCG_AREG0));
-    tcg_out_bundle(s, mII,
+                               TCG_REG_R2, TCG_AREG0),
+                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3,
+                               TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
+                               63 - CPU_TLB_ENTRY_BITS),
+                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
+                               TCG_REG_R57, 63 - s_bits,
+                               TARGET_PAGE_BITS - s_bits - 1));
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
+                               TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
                    tcg_opc_m3 (TCG_REG_P0,
                                (TARGET_LONG_BITS == 32
-                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
-                               TCG_REG_R2, offset_addend - offset_rw),
-                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
-                               TCG_REG_R57, 63 - s_bits,
-                               TARGET_PAGE_BITS - s_bits - 1),
+                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3,
+                               TCG_REG_R2, off_add - off_rw),
+                   INSN_NOP_I);
+    tcg_out_bundle(s, mmI,
+                   INSN_NOP_M,
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
-                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
+                               TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
+                   INSN_NOP_I);
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 15/19] tcg-ia64: Move bswap for store into tlb load
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (13 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 14/19] tcg-ia64: Re-bundle the tlb load Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 16/19] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
                   ` (3 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Saving at least two cycles per store, and cleaning up the code.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 96 +++++++++++++++++----------------------------------
 1 file changed, 32 insertions(+), 64 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 2fa6dd5..23be46c 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1571,9 +1571,11 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets.
-   R1, R3 are clobbered. */
+   R1, R3 are clobbered, leaving R56 free for...
+   BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
-                                    TCGMemOp s_bits, int off_rw, int off_add)
+                                    TCGMemOp s_bits, int off_rw, int off_add,
+                                    uint64_t bswap1, uint64_t bswap2)
 {
      /*
         .mii
@@ -1621,12 +1623,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                (TARGET_LONG_BITS == 32
                                 ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3,
                                TCG_REG_R2, off_add - off_rw),
-                   INSN_NOP_I);
+                   bswap1);
     tcg_out_bundle(s, mmI,
                    INSN_NOP_M,
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
                                TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
-                   INSN_NOP_I);
+                   bswap2);
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1656,7 +1658,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
-                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+                     INSN_NOP_I, INSN_NOP_I);
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1727,17 +1730,31 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    int addr_reg, data_reg, mem_index;
+    TCGReg addr_reg, data_reg, store_reg;
+    int mem_index;
+    uint64_t bswap1, bswap2;
     TCGMemOp s_bits;
 
-    data_reg = *args++;
+    store_reg = data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
 
+    bswap1 = bswap2 = INSN_NOP_I;
+    if (opc & MO_BSWAP) {
+        store_reg = TCG_REG_R56;
+        bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg);
+        if (s_bits < MO_64) {
+            int shift = 64 - (8 << s_bits);
+            bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                                 store_reg, store_reg, shift, 63 - shift);
+        }
+    }
+
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
-                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+                     bswap1, bswap2);
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1752,63 +1769,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-
-    switch (opc) {
-    case MO_8:
-    case MO_16:
-    case MO_32:
-    case MO_64:
-        tcg_out_bundle(s, mii,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I);
-        break;
-
-    case MO_16 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I,
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 15, 15));
-        tcg_out_bundle(s, miI,
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I,
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
-        data_reg = TCG_REG_R2;
-        break;
-
-    case MO_32 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I,
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 31, 31));
-        tcg_out_bundle(s, miI,
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I,
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
-        data_reg = TCG_REG_R2;
-        break;
-
-    case MO_64 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg));
-        data_reg = TCG_REG_R2;
-        break;
-
-    default:
-        tcg_abort();
-    }
-
+    tcg_out_bundle(s, mii,
+                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
+                               TCG_REG_R1, TCG_REG_R2),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
+                   INSN_NOP_I);
     tcg_out_bundle(s, miB,
-                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
-                               data_reg, TCG_REG_R3),
+                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
+                               store_reg, TCG_REG_R3),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 16/19] tcg-ia64: Move tlb addend load into tlb read
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (14 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 15/19] tcg-ia64: Move bswap for store into " Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 17/19] tcg-i64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
                   ` (2 subsequent siblings)
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 23be46c..819bca3 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1569,7 +1569,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
                   > 0x1fffff)
 
 /* Load and compare a TLB entry, and return the result in (p6, p7).
-   R2 is loaded with the address of the addend TLB entry.
+   R2 is loaded with the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets.
    R1, R3 are clobbered, leaving R56 free for...
    BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
@@ -1625,7 +1625,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                TCG_REG_R2, off_add - off_rw),
                    bswap1);
     tcg_out_bundle(s, mmI,
-                   INSN_NOP_M,
+                   tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2),
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
                                TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
                    bswap2);
@@ -1668,30 +1668,30 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
     tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
+                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
-                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R57),
+                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
+                               TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R3),
+                                   TCG_REG_R8, TCG_REG_R2),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R8, TCG_REG_R8, 15, 15));
     } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R3),
+                                   TCG_REG_R8, TCG_REG_R2),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R8, TCG_REG_R8, 31, 31));
     } else {
         tcg_out_bundle(s, mmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R3),
+                                   TCG_REG_R8, TCG_REG_R2),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        INSN_NOP_I);
     }
@@ -1763,10 +1763,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[s_bits]));
     tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
+                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
-                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R57),
+                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
+                               TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     tcg_out_bundle(s, mii,
@@ -1776,7 +1776,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                    INSN_NOP_I);
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
-                               store_reg, TCG_REG_R3),
+                               store_reg, TCG_REG_R2),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 17/19] tcg-i64: Reduce code duplication in tcg_out_qemu_ld
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (15 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 16/19] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 18/19] tcg-ia64: Convert to new ldst helpers Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

The only differences were in the bswap insns emitted.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 60 ++++++++++++++++++++-------------------------------
 1 file changed, 23 insertions(+), 37 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 819bca3..6355f32 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1647,13 +1647,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
     };
     int addr_reg, data_reg, mem_index;
-    TCGMemOp s_bits, bswap;
+    TCGMemOp s_bits;
+    uint64_t bswap1, bswap2;
 
     data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
-    bswap = opc & MO_BSWAP;
 
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
@@ -1662,6 +1662,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                      INSN_NOP_I, INSN_NOP_I);
 
     /* P6 is the fast path, and P7 the slow path */
+
+    bswap1 = bswap2 = INSN_NOP_I;
+    if (opc & MO_BSWAP) {
+        bswap1 = tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8);
+        if (s_bits < MO_64) {
+            int shift = 64 - (8 << s_bits);
+            bswap2 = tcg_opc_i11(TCG_REG_P6, OPC_EXTR_U_I11,
+                                 TCG_REG_R8, TCG_REG_R8, shift, 63 - shift);
+        }
+    }
+
     tcg_out_bundle(s, mLX,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
@@ -1674,41 +1685,16 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-    if (bswap && s_bits == MO_16) {
-        tcg_out_bundle(s, MmI,
-                       tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R2),
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R8, TCG_REG_R8, 15, 15));
-    } else if (bswap && s_bits == MO_32) {
-        tcg_out_bundle(s, MmI,
-                       tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R2),
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R8, TCG_REG_R8, 31, 31));
-    } else {
-        tcg_out_bundle(s, mmI,
-                       tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R2),
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I);
-    }
-    if (!bswap) {
-        tcg_out_bundle(s, miB,
-                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                       INSN_NOP_I,
-                       tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                                   TCG_REG_B0, TCG_REG_B6));
-    } else {
-        tcg_out_bundle(s, miB,
-                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8),
-                       tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                                   TCG_REG_B0, TCG_REG_B6));
-    }
-
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
+                               TCG_REG_R8, TCG_REG_R2),
+                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
+                   bswap1);
+    tcg_out_bundle(s, miB,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
+                   bswap2,
+                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
+                               TCG_REG_B0, TCG_REG_B6));
     tcg_out_bundle(s, miI,
                    INSN_NOP_M,
                    INSN_NOP_I,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 18/19] tcg-ia64: Convert to new ldst helpers
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (16 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 17/19] tcg-i64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Still inline, but updated to the new routines.  Always use the LE
helpers, reusing the bswap between the fast and slot paths.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 140 ++++++++++++++++++++++++++++----------------------
 1 file changed, 79 insertions(+), 61 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 6355f32..ea24e83 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -223,6 +223,7 @@ enum {
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
     OPC_BRL_SPTK_MANY_X3      = 0x18000001000ull,
+    OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull,
     OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull,
     OPC_CMP_LT_A6             = 0x18000000000ull,
     OPC_CMP_LTU_A6            = 0x1a000000000ull,
@@ -813,6 +814,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 #if defined(CONFIG_SOFTMMU)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58);
 #endif
         break;
     case 'Z':
@@ -1632,12 +1634,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx) */
+   int mmu_idx, uintptr_t retaddr) */
 static const void * const qemu_ld_helpers[4] = {
-    helper_ldb_mmu,
-    helper_ldw_mmu,
-    helper_ldl_mmu,
-    helper_ldq_mmu,
+    helper_ret_ldub_mmu,
+    helper_le_lduw_mmu,
+    helper_le_ldul_mmu,
+    helper_le_ldq_mmu,
 };
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
@@ -1648,7 +1650,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits;
-    uint64_t bswap1, bswap2;
+    uint64_t fin1, fin2, *desc, func, gp, here;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1663,51 +1665,59 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* P6 is the fast path, and P7 the slow path */
 
-    bswap1 = bswap2 = INSN_NOP_I;
+    fin2 = 0;
     if (opc & MO_BSWAP) {
-        bswap1 = tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8);
+        fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8);
         if (s_bits < MO_64) {
             int shift = 64 - (8 << s_bits);
-            bswap2 = tcg_opc_i11(TCG_REG_P6, OPC_EXTR_U_I11,
-                                 TCG_REG_R8, TCG_REG_R8, shift, 63 - shift);
+            fin2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                               data_reg, data_reg, shift, 63 - shift);
         }
+    } else {
+        fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
     }
 
-    tcg_out_bundle(s, mLX,
+    desc = (uintptr_t *)qemu_ld_helpers[s_bits];
+    func = desc[0];
+    gp = desc[1];
+    here = (uintptr_t)s->code_ptr;
+
+    tcg_out_bundle(s, mlx,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
-                               (tcg_target_long) qemu_ld_helpers[s_bits]));
-    tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
-                               TCG_REG_R2, 8),
+                   tcg_opc_l2 (here),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here));
+    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
-                               TCG_REG_R3, 0));
-    tcg_out_bundle(s, MmI,
+                   tcg_opc_l2 (gp),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
+    tcg_out_bundle(s, mmi,
                    tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                TCG_REG_R8, TCG_REG_R2),
-                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                   bswap1);
-    tcg_out_bundle(s, miB,
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                   bswap2,
-                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
-    tcg_out_bundle(s, miI,
+                   INSN_NOP_I);
+    func -= (uintptr_t)s->code_ptr;
+    tcg_out_bundle(s, mLX,
                    INSN_NOP_M,
-                   INSN_NOP_I,
-                   tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8));
+                   tcg_opc_l4 (func >> 4),
+                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
+                               TCG_REG_B0, func >> 4));
+
+    /* Note that we always use LE helper functions, so the bswap insns
+       here for the fast path also apply to the slow path.  */
+    tcg_out_bundle(s, (fin2 ? mII : miI),
+                   INSN_NOP_M,
+                   fin1,
+                   fin2 ? fin2 : INSN_NOP_I);
 }
 
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx) */
+   uintxx_t val, int mmu_idx, uintptr_t retaddr) */
 static const void * const qemu_st_helpers[4] = {
-    helper_stb_mmu,
-    helper_stw_mmu,
-    helper_stl_mmu,
-    helper_stq_mmu,
+    helper_ret_stb_mmu,
+    helper_le_stw_mmu,
+    helper_le_stl_mmu,
+    helper_le_stq_mmu,
 };
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
@@ -1716,56 +1726,64 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    TCGReg addr_reg, data_reg, store_reg;
+    TCGReg addr_reg, data_reg;
     int mem_index;
-    uint64_t bswap1, bswap2;
+    uint64_t pre1, pre2, *desc, func, gp, here;
     TCGMemOp s_bits;
 
-    store_reg = data_reg = *args++;
+    data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
 
-    bswap1 = bswap2 = INSN_NOP_I;
+    /* Note that we always use LE helper functions, so the bswap insns
+       that are here for the fast path also apply to the slow path,
+       and move the data into the argument register.  */
+    pre2 = INSN_NOP_I;
     if (opc & MO_BSWAP) {
-        store_reg = TCG_REG_R56;
-        bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg);
+        pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg);
         if (s_bits < MO_64) {
             int shift = 64 - (8 << s_bits);
-            bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
-                                 store_reg, store_reg, shift, 63 - shift);
+            pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                               TCG_REG_R58, TCG_REG_R58, shift, 63 - shift);
         }
+    } else {
+        /* Just move the data into place for the slow path.  */
+        pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
     }
 
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
                      offsetof(CPUArchState, tlb_table[mem_index][0].addend),
-                     bswap1, bswap2);
+                     pre1, pre2);
 
     /* P6 is the fast path, and P7 the slow path */
-    tcg_out_bundle(s, mLX,
+
+    desc = (uintptr_t *)qemu_st_helpers[s_bits];
+    func = desc[0];
+    gp = desc[1];
+    here = (uintptr_t)s->code_ptr;
+
+    tcg_out_bundle(s, mlx,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
-                               (tcg_target_long) qemu_st_helpers[s_bits]));
-    tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
-                               TCG_REG_R2, 8),
+                   tcg_opc_l2 (here),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here));
+    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
-                               TCG_REG_R3, 0));
-    tcg_out_bundle(s, mii,
-                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                               TCG_REG_R1, TCG_REG_R2),
-                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                   INSN_NOP_I);
-    tcg_out_bundle(s, miB,
+                   tcg_opc_l2 (gp),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
+    tcg_out_bundle(s, mmi,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
-                               store_reg, TCG_REG_R2),
+                               TCG_REG_R58, TCG_REG_R2),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
-                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
+                   INSN_NOP_I);
+    func -= (uintptr_t)s->code_ptr;
+    tcg_out_bundle(s, mLX,
+                   INSN_NOP_M,
+                   tcg_opc_l4 (func >> 4),
+                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
+                               TCG_REG_B0, func >> 4));
 }
 
 #else /* !CONFIG_SOFTMMU */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out of line
  2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
                   ` (17 preceding siblings ...)
  2013-09-06  6:50 ` [Qemu-devel] [PATCH 18/19] tcg-ia64: Convert to new ldst helpers Richard Henderson
@ 2013-09-06  6:50 ` Richard Henderson
  18 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2013-09-06  6:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 156 +++++++++++++++++++++++++++++++-------------------
 tcg/ia64/tcg-target.h |   2 +-
 2 files changed, 97 insertions(+), 61 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index ea24e83..9fd176d 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -219,6 +219,7 @@ enum {
     OPC_ALLOC_M34             = 0x02c00000000ull,
     OPC_BR_DPTK_FEW_B1        = 0x08400000000ull,
     OPC_BR_SPTK_MANY_B1       = 0x08000001000ull,
+    OPC_BR_CALL_SPNT_FEW_B3   = 0x0a200000000ull,
     OPC_BR_SPTK_MANY_B4       = 0x00100001000ull,
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
@@ -355,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm)
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+    return opc
+           | ((imm & 0x100000) << 16) /* s */
+           | ((imm & 0x0fffff) << 13) /* imm20b */
+           | ((b1 & 0x7) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2)
 {
     return opc
@@ -1633,14 +1643,70 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                    bswap2);
 }
 
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_ld_helpers[4] = {
-    helper_ret_ldub_mmu,
-    helper_le_lduw_mmu,
-    helper_le_ldul_mmu,
-    helper_le_ldq_mmu,
-};
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
+                                uint8_t *label_ptr)
+{
+    TCGLabelQemuLdst *l = &s->qemu_ldst_labels[s->nb_qemu_ldst_labels++];
+
+    assert(s->nb_qemu_ldst_labels <= TCG_MAX_QEMU_LDST);
+
+    /* We don't need most of the items in the generic structure.  */
+    memset(l, 0, sizeof(*l));
+    l->is_ld = is_ld;
+    l->opc = opc & MO_SIZE;
+    l->label_ptr[0] = label_ptr;
+}
+
+void tcg_out_tb_finalize(TCGContext *s)
+{
+    static const void * const helpers[8] = {
+        helper_ret_stb_mmu,
+        helper_le_stw_mmu,
+        helper_le_stl_mmu,
+        helper_le_stq_mmu,
+        helper_ret_ldub_mmu,
+        helper_le_lduw_mmu,
+        helper_le_ldul_mmu,
+        helper_le_ldq_mmu,
+    };
+    uintptr_t thunks[8] = { };
+    size_t i, n = s->nb_qemu_ldst_labels;
+
+    for (i = 0; i < n; i++) {
+        TCGLabelQemuLdst *l = &s->qemu_ldst_labels[i];
+        long x = l->is_ld * 4 + l->opc;
+        uintptr_t dest = thunks[x];
+
+        /* The out-of-line thunks are all the same; load the return address
+           from B0, load the GP, and branch to the code.  Note that we are
+           always post-call, so the register window has rolled, so we're
+           using incomming parameter register numbers, not outgoing.  */
+        if (dest == 0) {
+            uintptr_t disp, *desc = (uintptr_t *)helpers[x];
+
+            thunks[x] = dest = (uintptr_t)s->code_ptr;
+
+            tcg_out_bundle(s, mlx,
+                           INSN_NOP_M,
+                           tcg_opc_l2 (desc[1]),
+                           tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+                                       TCG_REG_R1, desc[1]));
+            tcg_out_bundle(s, mii,
+                           INSN_NOP_M,
+                           INSN_NOP_I,
+                           tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
+                                       l->is_ld ? TCG_REG_R35 : TCG_REG_R36,
+                                       TCG_REG_B0));
+            disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+            tcg_out_bundle(s, mLX,
+                           INSN_NOP_M,
+                           tcg_opc_l3 (disp),
+                           tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp));
+        }
+
+        reloc_pcrel21b(l->label_ptr[0], dest);
+    }
+}
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
@@ -1650,7 +1716,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits;
-    uint64_t fin1, fin2, *desc, func, gp, here;
+    uint64_t fin1, fin2;
+    uint8_t *label_ptr;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1677,31 +1744,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
     }
 
-    desc = (uintptr_t *)qemu_ld_helpers[s_bits];
-    func = desc[0];
-    gp = desc[1];
-    here = (uintptr_t)s->code_ptr;
-
-    tcg_out_bundle(s, mlx,
+    tcg_out_bundle(s, mmI,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 (here),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here));
-    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_l2 (gp),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
-    tcg_out_bundle(s, mmi,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index));
+    label_ptr = s->code_ptr + 2;
+    tcg_out_bundle(s, miB,
                    tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                TCG_REG_R8, TCG_REG_R2),
-                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                   INSN_NOP_I);
-    func -= (uintptr_t)s->code_ptr;
-    tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
-                   tcg_opc_l4 (func >> 4),
-                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
-                               TCG_REG_B0, func >> 4));
+                   INSN_NOP_I,
+                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+                               get_reloc_pcrel21b(label_ptr)));
+
+    add_qemu_ldst_label(s, 1, opc, label_ptr);
 
     /* Note that we always use LE helper functions, so the bswap insns
        here for the fast path also apply to the slow path.  */
@@ -1711,15 +1767,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                    fin2 ? fin2 : INSN_NOP_I);
 }
 
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_st_helpers[4] = {
-    helper_ret_stb_mmu,
-    helper_le_stw_mmu,
-    helper_le_stl_mmu,
-    helper_le_stq_mmu,
-};
-
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
 {
@@ -1728,8 +1775,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     };
     TCGReg addr_reg, data_reg;
     int mem_index;
-    uint64_t pre1, pre2, *desc, func, gp, here;
+    uint64_t pre1, pre2;
     TCGMemOp s_bits;
+    uint8_t *label_ptr;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1758,32 +1806,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      pre1, pre2);
 
     /* P6 is the fast path, and P7 the slow path */
-
-    desc = (uintptr_t *)qemu_st_helpers[s_bits];
-    func = desc[0];
-    gp = desc[1];
-    here = (uintptr_t)s->code_ptr;
-
-    tcg_out_bundle(s, mlx,
+    tcg_out_bundle(s, mmI,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 (here),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here));
-    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_l2 (gp),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
-    tcg_out_bundle(s, mmi,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index));
+    label_ptr = s->code_ptr + 2;
+    tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
                                TCG_REG_R58, TCG_REG_R2),
-                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
-                   INSN_NOP_I);
-    func -= (uintptr_t)s->code_ptr;
-    tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
-                   tcg_opc_l4 (func >> 4),
-                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
-                               TCG_REG_B0, func >> 4));
+                   INSN_NOP_I,
+                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+                               get_reloc_pcrel21b(label_ptr)));
+
+    add_qemu_ldst_label(s, 0, opc, label_ptr);
 }
 
 #else /* !CONFIG_SOFTMMU */
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 65897f2..9e1e8ba 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -25,7 +25,7 @@
 #ifndef TCG_TARGET_IA64 
 #define TCG_TARGET_IA64 1
 
-#undef TCG_QEMU_LDST_OPTIMIZATION
+#define TCG_QEMU_LDST_OPTIMIZATION
 
 /* We only map the first 64 registers */
 #define TCG_TARGET_NB_REGS 64
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2013-09-06  6:52 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-06  6:50 [Qemu-devel] [PATCH 00/19] tcg-ia64 fixes and improvements Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 01/19] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 02/19] tcg-ia64: Use shortcuts for nop insns Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 03/19] tcg-ia64: Handle constant calls Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 04/19] tcg-ia64: Simplify brcond Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 05/19] tcg-ia64: Move AREG0 to R32 Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 06/19] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 07/19] tcg-ia64: Use ADDS for small addition Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 08/19] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 09/19] tcg-ia64: Use A3 form of logical operations Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 10/19] tcg-ia64 Introduce tcg_opc_mov_a Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 11/19] tcg-ia64 Introduce tcg_opc_movi_a Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 12/19] tcg-ia64 Introduce tcg_opc_ext_i Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 13/19] tcg-ia64 Introduce tcg_opc_bswap64_i Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 14/19] tcg-ia64: Re-bundle the tlb load Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 15/19] tcg-ia64: Move bswap for store into " Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 16/19] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 17/19] tcg-i64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 18/19] tcg-ia64: Convert to new ldst helpers Richard Henderson
2013-09-06  6:50 ` [Qemu-devel] [PATCH 19/19] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).