qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: aliguori@amazon.com, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH 19/20] tcg-ia64: Move part of softmmu slow path out of line
Date: Thu, 31 Oct 2013 13:22:08 -0700	[thread overview]
Message-ID: <1383250929-10288-20-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1383250929-10288-1-git-send-email-rth@twiddle.net>

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 176 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 114 insertions(+), 62 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index c881a67..a9dd153 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -23,8 +23,6 @@
  * THE SOFTWARE.
  */
 
-#include "tcg-be-null.h"
-
 /*
  * Register definitions
  */
@@ -221,6 +219,7 @@ enum {
     OPC_ALLOC_M34             = 0x02c00000000ull,
     OPC_BR_DPTK_FEW_B1        = 0x08400000000ull,
     OPC_BR_SPTK_MANY_B1       = 0x08000001000ull,
+    OPC_BR_CALL_SPNT_FEW_B3   = 0x0a200000000ull,
     OPC_BR_SPTK_MANY_B4       = 0x00100001000ull,
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
@@ -357,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm)
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+    return opc
+           | ((imm & 0x100000) << 16) /* s */
+           | ((imm & 0x0fffff) << 13) /* imm20b */
+           | ((b1 & 0x7) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2)
 {
     return opc
@@ -1627,14 +1635,87 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                    bswap2);
 }
 
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_ld_helpers[4] = {
-    helper_ret_ldub_mmu,
-    helper_le_lduw_mmu,
-    helper_le_ldul_mmu,
-    helper_le_ldq_mmu,
-};
+#define TCG_MAX_QEMU_LDST       640
+
+typedef struct TCGLabelQemuLdst {
+    bool is_ld;
+    TCGMemOp size;
+    uint8_t *label_ptr;     /* label pointers to be updated */
+} TCGLabelQemuLdst;
+
+typedef struct TCGBackendData {
+    int nb_ldst_labels;
+    TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST];
+} TCGBackendData;
+
+static inline void tcg_out_tb_init(TCGContext *s)
+{
+    s->be->nb_ldst_labels = 0;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
+                                uint8_t *label_ptr)
+{
+    TCGBackendData *be = s->be;
+    TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++];
+
+    assert(be->nb_ldst_labels <= TCG_MAX_QEMU_LDST);
+    l->is_ld = is_ld;
+    l->size = opc & MO_SIZE;
+    l->label_ptr = label_ptr;
+}
+
+static void tcg_out_tb_finalize(TCGContext *s)
+{
+    static const void * const helpers[8] = {
+        helper_ret_stb_mmu,
+        helper_le_stw_mmu,
+        helper_le_stl_mmu,
+        helper_le_stq_mmu,
+        helper_ret_ldub_mmu,
+        helper_le_lduw_mmu,
+        helper_le_ldul_mmu,
+        helper_le_ldq_mmu,
+    };
+    uintptr_t thunks[8] = { };
+    TCGBackendData *be = s->be;
+    size_t i, n = be->nb_ldst_labels;
+
+    for (i = 0; i < n; i++) {
+        TCGLabelQemuLdst *l = &be->ldst_labels[i];
+        long x = l->is_ld * 4 + l->size;
+        uintptr_t dest = thunks[x];
+
+        /* The out-of-line thunks are all the same; load the return address
+           from B0, load the GP, and branch to the code.  Note that we are
+           always post-call, so the register window has rolled, so we're
+           using incomming parameter register numbers, not outgoing.  */
+        if (dest == 0) {
+            uintptr_t disp, *desc = (uintptr_t *)helpers[x];
+
+            thunks[x] = dest = (uintptr_t)s->code_ptr;
+
+            tcg_out_bundle(s, mlx,
+                           INSN_NOP_M,
+                           tcg_opc_l2 (desc[1]),
+                           tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+                                       TCG_REG_R1, desc[1]));
+            tcg_out_bundle(s, mii,
+                           INSN_NOP_M,
+                           INSN_NOP_I,
+                           tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
+                                       l->is_ld ? TCG_REG_R35 : TCG_REG_R36,
+                                       TCG_REG_B0));
+            disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+            tcg_out_bundle(s, mLX,
+                           INSN_NOP_M,
+                           tcg_opc_l3 (disp),
+                           tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp));
+        }
+
+        reloc_pcrel21b(l->label_ptr, dest);
+    }
+}
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
@@ -1644,7 +1725,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits;
-    uint64_t fin1, fin2, *desc, func, gp, here;
+    uint64_t fin1, fin2;
+    uint8_t *label_ptr;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1671,31 +1753,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
     }
 
-    desc = (uintptr_t *)qemu_ld_helpers[s_bits];
-    func = desc[0];
-    gp = desc[1];
-    here = (uintptr_t)s->code_ptr;
-
-    tcg_out_bundle(s, mlx,
+    tcg_out_bundle(s, mmI,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 (here),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here));
-    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_l2 (gp),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
-    tcg_out_bundle(s, mmi,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index));
+    label_ptr = s->code_ptr + 2;
+    tcg_out_bundle(s, miB,
                    tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                TCG_REG_R8, TCG_REG_R2),
-                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                   INSN_NOP_I);
-    func -= (uintptr_t)s->code_ptr;
-    tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
-                   tcg_opc_l4 (func >> 4),
-                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
-                               TCG_REG_B0, func >> 4));
+                   INSN_NOP_I,
+                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+                               get_reloc_pcrel21b(label_ptr)));
+
+    add_qemu_ldst_label(s, 1, opc, label_ptr);
 
     /* Note that we always use LE helper functions, so the bswap insns
        here for the fast path also apply to the slow path.  */
@@ -1705,15 +1776,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                    fin2 ? fin2 : INSN_NOP_I);
 }
 
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_st_helpers[4] = {
-    helper_ret_stb_mmu,
-    helper_le_stw_mmu,
-    helper_le_stl_mmu,
-    helper_le_stq_mmu,
-};
-
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
 {
@@ -1722,8 +1784,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     };
     TCGReg addr_reg, data_reg;
     int mem_index;
-    uint64_t pre1, pre2, *desc, func, gp, here;
+    uint64_t pre1, pre2;
     TCGMemOp s_bits;
+    uint8_t *label_ptr;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1752,35 +1815,24 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      pre1, pre2);
 
     /* P6 is the fast path, and P7 the slow path */
-
-    desc = (uintptr_t *)qemu_st_helpers[s_bits];
-    func = desc[0];
-    gp = desc[1];
-    here = (uintptr_t)s->code_ptr;
-
-    tcg_out_bundle(s, mlx,
+    tcg_out_bundle(s, mmI,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 (here),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here));
-    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_l2 (gp),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
-    tcg_out_bundle(s, mmi,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index));
+    label_ptr = s->code_ptr + 2;
+    tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
                                TCG_REG_R58, TCG_REG_R2),
-                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
-                   INSN_NOP_I);
-    func -= (uintptr_t)s->code_ptr;
-    tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
-                   tcg_opc_l4 (func >> 4),
-                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
-                               TCG_REG_B0, func >> 4));
+                   INSN_NOP_I,
+                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+                               get_reloc_pcrel21b(label_ptr)));
+
+    add_qemu_ldst_label(s, 0, opc, label_ptr);
 }
 
 #else /* !CONFIG_SOFTMMU */
+# include "tcg-be-null.h"
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
-- 
1.8.3.1

  parent reply	other threads:[~2013-10-31 20:22 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
2013-11-03 22:55   ` Aurelien Jarno
2013-11-06 22:05     ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns Richard Henderson
2013-11-03 22:55   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls Richard Henderson
2013-11-03 22:56   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond Richard Henderson
2013-11-03 22:56   ` Aurelien Jarno
2013-11-03 23:34     ` Richard Henderson
2013-11-04  4:24       ` Richard Henderson
2013-11-06 22:05         ` Aurelien Jarno
2013-11-06 22:04       ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32 Richard Henderson
2013-11-06 22:33   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
2013-11-06 22:33   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 14/20] tcg-ia64: Re-bundle the tlb load Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 15/20] tcg-ia64: Move bswap for store into " Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 16/20] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 17/20] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 18/20] tcg-ia64: Convert to new ldst helpers Richard Henderson
2013-10-31 20:22 ` Richard Henderson [this message]
2013-10-31 20:22 ` [Qemu-devel] [PATCH 20/20] tcg-ia64: Optimize small arguments to exit_tb Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1383250929-10288-20-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aliguori@amazon.com \
    --cc=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).