qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: anthony@codemonkey.ws
Subject: [Qemu-devel] [PULL 17/21] tcg-ppc64: Handle long offsets better
Date: Wed, 25 Sep 2013 09:27:34 -0700	[thread overview]
Message-ID: <1380126458-3247-18-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1380126458-3247-1-git-send-email-rth@twiddle.net>

Previously we'd only handle 16-bit offsets from memory operand without falling
back to indexed, but it's easy to use ADDIS to handle full 32-bit offsets.

This also lets us unify code that existed inline in tcg_out_op for handling
addition of large constants.

The new R2 temporary was marked reserved for the AIX calling convention, but
the register really is call-clobbered and since tcg generated code has no use
for a TOC, it's available for use.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc64/tcg-target.c | 147 +++++++++++++++++++++++++------------------------
 1 file changed, 74 insertions(+), 73 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index c01a8bb..51d2b06 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -119,7 +119,6 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R31,
     TCG_REG_R12,  /* call clobbered, non-arguments */
     TCG_REG_R11,
-    TCG_REG_R2,
     TCG_REG_R10,  /* call clobbered, arguments */
     TCG_REG_R9,
     TCG_REG_R8,
@@ -746,25 +745,55 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
 #endif
 }
 
-static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
-                         int offset, int op1, int op2)
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+                             TCGReg base, tcg_target_long offset)
 {
-    if (offset == (int16_t) offset) {
-        tcg_out32(s, op1 | TAI(ret, addr, offset));
-    } else {
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset);
-        tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0));
+    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
+    TCGReg rs = TCG_REG_R2;
+
+    assert(rt != TCG_REG_R2 && base != TCG_REG_R2);
+
+    switch (opi) {
+    case LD: case LWA:
+        align = 3;
+        /* FALLTHRU */
+    default:
+        if (rt != TCG_REG_R0) {
+            rs = rt;
+        }
+        break;
+    case STD:
+        align = 3;
+        break;
+    case STB: case STH: case STW:
+        break;
     }
-}
 
-static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
-                          int offset, int op1, int op2)
-{
-    if (offset == (int16_t)(offset & ~3)) {
-        tcg_out32(s, op1 | TAI(ret, addr, offset));
-    } else {
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset);
-        tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0));
+    /* For unaligned, or very large offsets, use the indexed form.  */
+    if (offset & align || offset != (int32_t)offset) {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig);
+        tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2));
+        return;
+    }
+
+    l0 = (int16_t)offset;
+    offset = (offset - l0) >> 16;
+    l1 = (int16_t)offset;
+
+    if (l1 < 0 && orig >= 0) {
+        extra = 0x4000;
+        l1 = (int16_t)(offset - 0x4000);
+    }
+    if (l1) {
+        tcg_out32(s, ADDIS | TAI(rs, base, l1));
+        base = rs;
+    }
+    if (extra) {
+        tcg_out32(s, ADDIS | TAI(rs, base, extra));
+        base = rs;
+    }
+    if (opi != ADDI || base != rt || l0 != 0) {
+        tcg_out32(s, opi | TAI(rt, base, l0));
     }
 }
 
@@ -1074,24 +1103,30 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out32(s, BCLR | BO_ALWAYS);
 }
 
-static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
-                       intptr_t arg2)
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+                              TCGReg arg1, intptr_t arg2)
 {
+    int opi, opx;
+
     if (type == TCG_TYPE_I32) {
-        tcg_out_ldst(s, ret, arg1, arg2, LWZ, LWZX);
+        opi = LWZ, opx = LWZX;
     } else {
-        tcg_out_ldsta(s, ret, arg1, arg2, LD, LDX);
+        opi = LD, opx = LDX;
     }
+    tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
 }
 
-static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
-                       intptr_t arg2)
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                              TCGReg arg1, intptr_t arg2)
 {
+    int opi, opx;
+
     if (type == TCG_TYPE_I32) {
-        tcg_out_ldst(s, arg, arg1, arg2, STW, STWX);
+        opi = STW, opx = STWX;
     } else {
-        tcg_out_ldsta(s, arg, arg1, arg2, STD, STDX);
+        opi = STD, opx = STDX;
     }
+    tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
 }
 
 static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
@@ -1449,61 +1484,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8u_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX);
+        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld8s_i32:
     case INDEX_op_ld8s_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX);
+        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
         tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
         break;
     case INDEX_op_ld16u_i32:
     case INDEX_op_ld16u_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LHZ, LHZX);
+        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld16s_i32:
     case INDEX_op_ld16s_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LHA, LHAX);
+        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld_i32:
     case INDEX_op_ld32u_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LWZ, LWZX);
+        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld32s_i64:
-        tcg_out_ldsta(s, args[0], args[1], args[2], LWA, LWAX);
+        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld_i64:
-        tcg_out_ldsta(s, args[0], args[1], args[2], LD, LDX);
+        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
         break;
     case INDEX_op_st8_i32:
     case INDEX_op_st8_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], STB, STBX);
+        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
         break;
     case INDEX_op_st16_i32:
     case INDEX_op_st16_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], STH, STHX);
+        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i32:
     case INDEX_op_st32_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], STW, STWX);
+        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i64:
-        tcg_out_ldsta(s, args[0], args[1], args[2], STD, STDX);
+        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
         break;
 
     case INDEX_op_add_i32:
         a0 = args[0], a1 = args[1], a2 = args[2];
         if (const_args[2]) {
-            int32_t l, h;
         do_addi_32:
-            l = (int16_t)a2;
-            h = a2 - l;
-            if (h) {
-                tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16));
-                a1 = a0;
-            }
-            if (l || a0 != a1) {
-                tcg_out32(s, ADDI | TAI(a0, a1, l));
-            }
+            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
         } else {
             tcg_out32(s, ADD | TAB(a0, a1, a2));
         }
@@ -1680,32 +1706,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_add_i64:
         a0 = args[0], a1 = args[1], a2 = args[2];
         if (const_args[2]) {
-            int32_t l0, h1, h2;
         do_addi_64:
-            /* We can always split any 32-bit signed constant into 3 pieces.
-               Note the positive 0x80000000 coming from the sub_i64 path,
-               handled with the same code we need for eg 0x7fff8000.  */
-            assert(a2 == (int32_t)a2 || a2 == 0x80000000);
-            l0 = (int16_t)a2;
-            h1 = a2 - l0;
-            h2 = 0;
-            if (h1 < 0 && (int64_t)a2 > 0) {
-                h2 = 0x40000000;
-                h1 = a2 - h2 - l0;
-            }
-            assert((TCGArg)h2 + h1 + l0 == a2);
-
-            if (h2) {
-                tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16));
-                a1 = a0;
-            }
-            if (h1) {
-                tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16));
-                a1 = a0;
-            }
-            if (l0 || a0 != a1) {
-                tcg_out32(s, ADDI | TAI(a0, a1, l0));
-            }
+            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
         } else {
             tcg_out32(s, ADD | TAB(a0, a1, a2));
         }
@@ -2144,10 +2146,9 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* mem temp */
 #ifdef __APPLE__
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */
-#else
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc */
 #endif
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
 
-- 
1.8.1.4

  parent reply	other threads:[~2013-09-25 16:28 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-25 16:27 [Qemu-devel] [PULL 00/21] TCG ppc and ppc64 updates Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 01/21] configure: Allow command-line configure for ppc32 Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 02/21] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 03/21] tcg-ppc: use new return-argument ld/st helpers Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 04/21] tcg-ppc: Avoid code for nop move Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 05/21] tcg-ppc: Cleanup tcg_out_qemu_ld/st_slow_path Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 06/21] tcg-ppc: Use conditional branch and link to slow path Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 07/21] tcg-ppc: Fix and cleanup tcg_out_tlb_check Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 08/21] tcg-ppc64: Reformat tcg-target.c Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 09/21] tcg-ppc64: More use of TAI and SAI helper macros Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 10/21] tcg-ppc64: Use TCG_REG_Rn constants Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 11/21] tcg-ppc64: Use tcg_out64 Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 12/21] tcg-ppc64: Avoid code for nop move Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 13/21] tcg-ppc64: Don't load the static chain from TCG Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 14/21] tcg-ppc64: Fold constant call address into descriptor load Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 15/21] tcg-ppc64: Look through a constant function descriptor Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 16/21] tcg-ppc64: Tidy register allocation order Richard Henderson
2013-09-25 16:27 ` Richard Henderson [this message]
2013-09-25 16:27 ` [Qemu-devel] [PULL 18/21] tcg-ppc64: Implement tcg_register_jit Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 19/21] tcg-ppc64: Streamline tcg_out_tlb_read Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 20/21] tcg-ppc64: Add _noaddr functions for emitting forward branches Richard Henderson
2013-09-25 16:27 ` [Qemu-devel] [PULL 21/21] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1380126458-3247-18-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=anthony@codemonkey.ws \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).