[Qemu-devel] [PATCH v2 12/16] tcg-mips: Improve tcg_out_movi for mips64

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: mark.cave-ayland@ilande.co.uk, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH v2 12/16] tcg-mips: Improve tcg_out_movi for mips64
Date: Mon, 15 Feb 2016 14:42:30 +1100	[thread overview]
Message-ID: <1455507754-8978-13-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1455507754-8978-1-git-send-email-rth@twiddle.net>

For r6, the ADDIUPC, ALUIPC, DAHI and DATI instructions can eliminate 2-4
insns off the pre-r6 full 64-bit immediate load sequence.

For pre-r6, NAL can do the same for the special, but common, case of the
return address for the qemu_ld/st slow paths.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/mips/tcg-target.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 109 insertions(+), 8 deletions(-)

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 8fb2ab0..b4239cb 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -282,7 +282,9 @@ typedef enum {
     OPC_ORI      = 015 << 26,
     OPC_XORI     = 016 << 26,
     OPC_LUI      = 017 << 26,
+    OPC_AUI      = OPC_LUI,
     OPC_DADDIU   = 031 << 26,
+    OPC_DAUI     = 035 << 26,
     OPC_LB       = 040 << 26,
     OPC_LH       = 041 << 26,
     OPC_LW       = 043 << 26,
@@ -362,6 +364,9 @@ typedef enum {
     OPC_REGIMM   = 001 << 26,
     OPC_BLTZ     = OPC_REGIMM | (000 << 16),
     OPC_BGEZ     = OPC_REGIMM | (001 << 16),
+    OPC_DAHI     = OPC_REGIMM | (006 << 16),
+    OPC_NAL      = OPC_REGIMM | (020 << 16) | 1,  /* bltzal zero, .+8 */
+    OPC_DATI     = OPC_REGIMM | (036 << 16),
 
     OPC_SPECIAL2 = 034 << 26,
     OPC_MUL_R5   = OPC_SPECIAL2 | 002,
@@ -381,6 +386,10 @@ typedef enum {
     OPC_SEB      = OPC_SPECIAL3 | 02040,
     OPC_SEH      = OPC_SPECIAL3 | 03040,
 
+    OPC_PCREL    = 073 << 26,
+    OPC_ADDIUPC  = OPC_PCREL | (0 << 19),
+    OPC_ALUIPC   = OPC_PCREL | (3 << 19) | (7 << 16),
+
     /* MIPS r6 doesn't have JR, JALR should be used instead */
     OPC_JR       = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
 
@@ -427,6 +436,17 @@ static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
     tcg_out32(s, inst);
 }
 
+static inline void tcg_out_opc_pc19(TCGContext *s, MIPSInsn opc,
+                                    TCGReg rs, TCGArg imm)
+{
+    int32_t inst;
+
+    inst = opc;
+    inst |= (rs & 0x1F) << 21;
+    inst |= (imm & 0x7ffff);
+    tcg_out32(s, inst);
+}
+
 /*
  * Type bitfield
  */
@@ -555,6 +575,9 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
 static void tcg_out_movi(TCGContext *s, TCGType type,
                          TCGReg ret, tcg_target_long arg)
 {
+    uintptr_t pc = (uintptr_t)s->code_ptr;
+    intptr_t disp;
+
     if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
         arg = (int32_t)arg;
     }
@@ -566,18 +589,96 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
         tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg);
         return;
     }
+
+    /* PC-relative address load, part 1.  The out-of-line slow paths
+       of qemu_ld/st compute the "return address" of the in-line fast
+       path, so that we can find the guest instruction that triggered
+       the memory fault.  Here, we can do that in one instruction.  */
+    if (use_mips32r6_instructions && (arg & 3) == 0) {
+        disp = arg - pc;
+        if (disp == sextract32(disp, 0, 21)) {
+            tcg_out_opc_pc19(s, OPC_ADDIUPC, ret, disp >> 2);
+            return;
+        }
+    }
+
     if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
         tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
-        if (arg & 0xffff0000ull) {
-            tcg_out_dsll(s, ret, ret, 16);
-            tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
-            tcg_out_dsll(s, ret, ret, 16);
-        } else {
-            tcg_out_dsll(s, ret, ret, 32);
+        goto do_lo16;
+    }
+
+    if (use_mips32r6_instructions) {
+        tcg_target_long tmp;
+        TCGReg in;
+
+        /* PC-relative address load, part 2.  Here we are able to compute
+           a +- 2GB relative address in two instructions.  This not likely
+           to be within code_gen_buffer, but a helper function address or
+           other host memory that just happens to be in range.  */
+        disp = sextract64(arg, 16, 48) - sextract64(pc, 16, 48);
+        if (disp == (int16_t)disp) {
+            tcg_out_opc_imm(s, OPC_ALUIPC, 0, ret, disp);
+            goto do_lo16;
+        }
+
+        /* The R6 manual recommends construction of immediates in
+           order of low to high (ADDI, AUI, DAHI, DATI) in order
+           to simplify hardware recognizing these sequences.  */
+
+        in = TCG_REG_ZERO;
+        tmp = (int16_t)arg;
+        if (tmp) {
+            tcg_out_opc_imm(s, OPC_ADDIU, ret, in, tmp);
+            in = ret;
         }
+        arg = (arg - tmp) >> 16;
+        tmp = (int16_t)arg;
+
+        /* Note that DAHI and DATI only have one register operand,
+           and are thus we must put a zero low part in place.  Also
+           note that we already eliminated simple 32-bit constants
+           so we know this must happen.  */
+        if (tmp || in != ret) {
+            tcg_out_opc_imm(s, OPC_AUI, ret, in, tmp);
+        }
+        arg = (arg - tmp) >> 16;
+        tmp = (int16_t)arg;
+
+        if (tmp) {
+            tcg_out_opc_imm(s, OPC_DAHI, 0, ret, tmp);
+        }
+        arg = (arg - tmp) >> 16;
+        tcg_debug_assert(arg == (int16_t)arg);
+
+        if (arg) {
+            tcg_out_opc_imm(s, OPC_DATI, 0, ret, arg);
+        }
+        return;
+    }
+
+    /* PC-relative address load, part 3.  For mips64 pre-r6, we can use
+       NAL (nop and link, aka BLTZAL with a false condition) to load the
+       return address register with pc+8.  This lets us compute the
+       return address for qemu_ld/st only in 2 insns.  */
+    disp = arg - (pc + 8);
+    if (disp == (int16_t)disp) {
+        tcg_out32(s, OPC_NAL);
+        tcg_out_opc_imm(s, OPC_DADDIU, ret, TCG_REG_RA, disp);
+        return;
+    }
+
+    /* Last resort: To build a full 64-bit constant, we load the high
+       32 bits, then shift and or in the low 32 bits.  This may take
+       up to 6 instructions.  */
+    tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
+    if (arg & 0xffff0000ull) {
+        tcg_out_dsll(s, ret, ret, 16);
+        tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
+        tcg_out_dsll(s, ret, ret, 16);
+    } else {
+        tcg_out_dsll(s, ret, ret, 32);
     }
+ do_lo16:
     if (arg & 0xffff) {
         tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
     }
-- 
2.5.0

next prev parent reply	other threads:[~2016-02-15  3:43 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-15  3:42 [Qemu-devel] [PATCH v2 00/16] tcg mips64 and mips r6 improvements Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 01/16] tcg-mips: Always use tcg_debug_assert Richard Henderson
2016-02-28 23:51   ` Aurelien Jarno
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 02/16] tcg-mips: Move bswap code to a subroutine Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 03/16] tcg-mips: Add mips64 opcodes Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 04/16] tcg-mips: Support 64-bit opcodes Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 05/16] tcg-mips: Add bswap32u and bswap64 Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 06/16] tcg-mips: Adjust move functions for mips64 Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 07/16] tcg-mips: Adjust load/store " Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 08/16] tcg-mips: Adjust prologue " Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 09/16] tcg-mips: Add tcg unwind info Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 10/16] tcg-mips: Adjust qemu_ld/st for mips64 Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 11/16] tcg-mips: Adjust calling conventions " Richard Henderson
2016-02-15  3:42 ` Richard Henderson [this message]
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 13/16] tcg-mips: Use mips64r6 instructions in tcg_out_ldst Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 14/16] tcg-mips: Use mips64r6 instructions in constant addition Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 15/16] tcg-mips: Use mipsr6 instructions in branches Richard Henderson
2016-02-15  3:42 ` [Qemu-devel] [PATCH v2 16/16] tcg-mips: Use mipsr6 instructions in calls Richard Henderson
2016-02-15  3:47 ` [Qemu-devel] [PATCH v2 00/16] tcg mips64 and mips r6 improvements Richard Henderson
2016-02-28 23:51 ` Aurelien Jarno

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8fb2ab0 dfblob:b4239cb )
 OR (
bs:"[Qemu-devel] [PATCH v2 12/16] tcg-mips: Improve tcg_out_movi for mips64" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1455507754-8978-13-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aurelien@aurel32.net \
    --cc=mark.cave-ayland@ilande.co.uk \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).