qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: av1474@comtv.ru
Subject: [Qemu-devel] [PATCH 25/26] tcg-ppc64: Use the return address as a base pointer
Date: Thu,  1 May 2014 08:44:46 -0700	[thread overview]
Message-ID: <1398959087-23590-26-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1398959087-23590-1-git-send-email-rth@twiddle.net>

This can significantly reduce code size for generation of (some)
64-bit constants.  With the side effect that we know for a fact
that exit_tb can use the register to good effect.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc/tcg-target.c | 105 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 93 insertions(+), 12 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 8570c6f..7d4fd3e 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -41,6 +41,30 @@
 # define TCG_REG_TMP1   TCG_REG_R12
 #endif
 
+/* For the 64-bit target, we don't like the 5 insn sequence needed to build
+   full 64-bit addresses.  Better to have a base register to which we can
+   apply a 32-bit displacement.
+
+   There are generally three items of interest:
+   (1) helper functions in the main executable,
+   (2) TranslationBlock data structures,
+   (3) the return address in the epilogue.
+
+   For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
+   will be inside the main executable, and thus near enough to make a
+   pointer to the epilogue be within 2GB of all helper functions.
+
+   For softmmu, we'll let the kernel choose the address of code_gen_buffer,
+   and odds are it'll be somewhere close to the main malloc arena, and so
+   a pointer to the epilogue will be within 2GB of the TranslationBlocks.
+
+   For --enable-pie, everything will be kinda near everything else,
+   somewhere in high memory.
+
+   Thus we choose to keep the return address in a call-saved register.  */
+#define TCG_REG_RA     TCG_REG_R31
+#define USE_REG_RA     (TCG_TARGET_REG_BITS == 64)
+
 /* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
 #define SZP  ((int)sizeof(void *))
 
@@ -467,6 +491,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define TW     XO31( 4)
 #define TRAP   (TW | TO(31))
 
+#define NOP    ORI  /* ori 0,0,0 */
+
 #define RT(r) ((r)<<21)
 #define RS(r) ((r)<<21)
 #define RA(r) ((r)<<16)
@@ -531,6 +557,9 @@ static const uint32_t tcg_to_isel[] = {
     [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
 };
 
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+                             TCGReg base, tcg_target_long offset);
+
 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 {
     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
@@ -601,7 +630,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
         tcg_out32(s, ADDI | TAI(ret, 0, arg));
         tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
     } else {
-        int32_t high = arg >> 31 >> 1;
+        int32_t high;
+
+        if (USE_REG_RA) {
+            intptr_t diff = arg - (intptr_t)tb_ret_addr;
+            if (diff == (int32_t)diff) {
+                tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
+                return;
+            }
+        }
+
+        high = arg >> 31 >> 1;
         tcg_out_movi32(s, ret, high);
         if (high) {
             tcg_out_shli64(s, ret, ret, 32);
@@ -1711,18 +1750,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 {
     int i;
 
-    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
-                  CPU_TEMP_BUF_SIZE);
-
 #ifdef _CALL_AIX
-    {
-      void **desc = (void **)s->code_ptr;
-      desc[0] = desc + 2;                   /* entry point */
-      desc[1] = 0;                          /* environment pointer */
-      s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */
-    }
+    void **desc = (void **)s->code_ptr;
+    desc[0] = desc + 2;                   /* entry point */
+    desc[1] = 0;                          /* environment pointer */
+    s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */
 #endif
 
+    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
+                  CPU_TEMP_BUF_SIZE);
+
     /* Prologue */
     tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
     tcg_out32(s, (SZR == 8 ? STDU : STWU)
@@ -1743,10 +1780,36 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
     tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
-    tcg_out32(s, BCCTR | BO_ALWAYS);
+
+    if (USE_REG_RA) {
+#ifdef _CALL_AIX
+        /* Make the caller load the value as the TOC into R2.  */
+        tb_ret_addr = s->code_ptr + 2;
+        desc[1] = tb_ret_addr;
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
+        tcg_out32(s, BCCTR | BO_ALWAYS);
+#elif defined(_CALL_ELF) && _CALL_ELF == 2
+        /* Compute from the incoming R12 value.  */
+        tb_ret_addr = s->code_ptr + 2;
+        tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
+                                tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
+        tcg_out32(s, BCCTR | BO_ALWAYS);
+#else
+        /* Reserve max 5 insns for the constant load.  */
+        tb_ret_addr = s->code_ptr + 6;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
+        tcg_out32(s, BCCTR | BO_ALWAYS);
+        while (s->code_ptr < tb_ret_addr) {
+            tcg_out32(s, NOP);
+        }
+#endif
+    } else {
+        tcg_out32(s, BCCTR | BO_ALWAYS);
+        tb_ret_addr = s->code_ptr;
+    }
 
     /* Epilogue */
-    tb_ret_addr = s->code_ptr;
+    assert(tb_ret_addr == s->code_ptr);
 
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
@@ -1766,6 +1829,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 
     switch (opc) {
     case INDEX_op_exit_tb:
+        if (USE_REG_RA) {
+            ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
+
+            /* If we can use a direct branch, otherwise use the value in RA.
+               Note that the direct branch is always forward.  If it's in
+               range now, it'll still be in range after the movi.  Don't
+               bother about the 20 bytes where the test here fails but it
+               would succeed below.  */
+            if (!in_range_b(disp)) {
+                tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
+                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+                tcg_out32(s, BCCTR | BO_ALWAYS);
+                break;
+            }
+        }
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
         tcg_out_b(s, 0, tb_ret_addr);
         break;
@@ -2476,6 +2554,9 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
 #endif
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
+    if (USE_REG_RA) {
+        tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);  /* return addr */
+    }
 
     tcg_add_target_add_op_defs(ppc_op_defs);
 }
-- 
1.9.0

  parent reply	other threads:[~2014-05-01 15:46 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-01 15:44 [Qemu-devel] [PATCH 00/26] Merge ppc32/ppc64 tcg backends Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 01/26] tcg-ppc: Use uintptr_t in ppc_tb_set_jmp_target Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 02/26] tcg-ppc64: Avoid some hard-codings of TCG_TYPE_I64 Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 03/26] tcg-ppc64: Move functions around Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 04/26] tcg-ppc64: Relax register restrictions in tcg_out_mem_long Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 05/26] tcg-ppc64: Use tcg_out_{ld, st, cmp} internally Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 06/26] tcg-ppc64: Make TCG_AREG0 and TCG_REG_CALL_STACK enum constants Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 07/26] tcg-ppc64: Move call macros out of tcg-target.h Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 08/26] tcg-ppc64: Fix TCG_TARGET_CALL_STACK_OFFSET Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 09/26] tcg-ppc64: Better parameterize the stack frame Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 10/26] tcg-ppc64: Use the correct test in tcg_out_call Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 11/26] tcg-ppc64: Support the ppc64 elfv2 ABI Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 12/26] tcg-ppc64: Adjust tcg_out_call for ELFv2 Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 13/26] tcg-ppc64: Merge 32-bit ABIs into the prologue / frame code Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 14/26] tcg-ppc64: Fix sub2 implementation Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 15/26] tcg-ppc64: Begin merging ppc32 with ppc64 Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 16/26] tcg-ppc64: Merge ppc32 brcond2, setcond2, muluh Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 17/26] tcg-ppc64: Merge ppc32 qemu_ld/st Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 18/26] tcg-ppc64: Merge ppc32 register usage Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 19/26] tcg-ppc64: Support mulsh_i32 Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 20/26] tcg-ppc64: Merge ppc32 shifts Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 21/26] tcg-ppc: Remove the backend Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 22/26] tcg-ppc: Rename the tcg/ppc64 backend Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 23/26] qemu/osdep: Remove the need for qemu_init_auxval Richard Henderson
2014-05-01 15:44 ` [Qemu-devel] [PATCH 24/26] tcg-ppc: Merge cache-utils into the backend Richard Henderson
2014-05-01 15:44 ` Richard Henderson [this message]
2014-05-01 15:44 ` [Qemu-devel] [PATCH 26/26] tcg-ppc: Streamline USE_DIRECT_JUMP Richard Henderson
2014-05-02 14:56 ` [Qemu-devel] [PATCH 00/26] Merge ppc32/ppc64 tcg backends Tom Musta
2014-05-02 16:30 ` Ulrich Weigand
2014-05-02 16:43   ` Richard Henderson
2014-05-05 20:32     ` Tom Musta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1398959087-23590-26-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=av1474@comtv.ru \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).