qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [4715] Emit trampolines manually in prologue
@ 2008-06-09 19:57 malc
  2008-06-09 20:24 ` Fabrice Bellard
  0 siblings, 1 reply; 3+ messages in thread
From: malc @ 2008-06-09 19:57 UTC (permalink / raw)
  To: qemu-devel

Revision: 4715
          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4715
Author:   malc
Date:     2008-06-09 19:57:36 +0000 (Mon, 09 Jun 2008)

Log Message:
-----------
Emit trampolines manually in prologue

Modified Paths:
--------------
    trunk/tcg/ppc/tcg-target.c

Modified: trunk/tcg/ppc/tcg-target.c
===================================================================
--- trunk/tcg/ppc/tcg-target.c	2008-06-09 19:57:27 UTC (rev 4714)
+++ trunk/tcg/ppc/tcg-target.c	2008-06-09 19:57:36 UTC (rev 4715)
@@ -23,6 +23,8 @@
  */
 
 static uint8_t *tb_ret_addr;
+static uint8_t *udiv_addr;
+static uint8_t *div_addr;
 
 #define FAST_PATH
 #if TARGET_PHYS_ADDR_BITS <= 32
@@ -118,7 +120,7 @@
 };
 
 static const int tcg_target_callee_save_regs[] = {
-    TCG_REG_R13,                /* sould r13 be saved? */
+    TCG_REG_R13,                /* should r13 be saved? */
     TCG_REG_R14,
     TCG_REG_R15,
     TCG_REG_R16,
@@ -135,6 +137,22 @@
     TCG_REG_R31
 };
 
+static const int div_save_regs[] = {
+    TCG_REG_R4,
+    TCG_REG_R5,
+    TCG_REG_R7,
+    TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R12,
+    TCG_REG_R13,                /* should r13 be saved? */
+    TCG_REG_R24,
+    TCG_REG_R25,
+    TCG_REG_R26,
+    TCG_REG_R27,
+};
+
 static uint32_t reloc_pc24_val (void *pc, tcg_target_long target)
 {
     tcg_target_long disp;
@@ -799,9 +817,25 @@
 #endif
 }
 
+static uint64_t ppc_udiv_helper (uint64_t a, uint32_t b)
+{
+    uint64_t rem, quo;
+    quo = a / b;
+    rem = a % b;
+    return (rem << 32) | (uint32_t) quo;
+}
+
+static uint64_t ppc_div_helper (int64_t a, int32_t b)
+{
+    int64_t rem, quo;
+    quo = a / b;
+    rem = a % b;
+    return (rem << 32) | (uint32_t) quo;
+}
+
 void tcg_target_qemu_prologue (TCGContext *s)
 {
-    int i, frame_size;
+    int i, j, frame_size;
 
     frame_size = 0
         + 4                     /* back chain */
@@ -837,6 +871,49 @@
     tcg_out32 (s, MTSPR | RS (0) | LR);
     tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
     tcg_out32 (s, BCLR | BO_ALWAYS);
+
+    /* div trampolines */
+    for (j = 0; j < 2; ++j) {
+        tcg_target_long target;
+
+        frame_size = 8 + ARRAY_SIZE (div_save_regs) * 4;
+        frame_size = (frame_size + 15) & ~15;
+
+        if (j == 0) {
+            target = (tcg_target_long) ppc_udiv_helper;
+            udiv_addr = s->code_ptr;
+        }
+        else {
+            target = (tcg_target_long) ppc_div_helper;
+            div_addr = s->code_ptr;
+        }
+
+        tcg_out32 (s, MFSPR | RT (0) | LR);
+        tcg_out32 (s, STWU | RS (1) | RA (1) | (-frame_size & 0xffff));
+        for (i = 0; i < ARRAY_SIZE (div_save_regs); ++i)
+            tcg_out32 (s, (STW
+                           | RS (div_save_regs[i])
+                           | RA (1)
+                           | (i * 4 + 8)
+                           )
+                );
+        tcg_out32 (s, STW | RS (0) | RA (1) | (frame_size - 4));
+        tcg_out_mov (s, 4, 6);
+        tcg_out_b (s, LK, target);
+        tcg_out_mov (s, 6, 4);
+
+        for (i = 0; i < ARRAY_SIZE (div_save_regs); ++i)
+            tcg_out32 (s, (LWZ
+                           | RT (div_save_regs[i])
+                           | RA (1)
+                           | (i * 4 + 8)
+                           )
+                );
+        tcg_out32 (s, LWZ | RT (0) | RA (1) | (frame_size - 4));
+        tcg_out32 (s, MTSPR | RS (0) | LR);
+        tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
+        tcg_out32 (s, BCLR | BO_ALWAYS);
+    }
 }
 
 static void tcg_out_ld (TCGContext *s, TCGType type, int ret, int arg1,
@@ -1018,41 +1095,6 @@
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
 
-static uint64_t __attribute ((used)) ppc_udiv_helper (uint64_t a, uint32_t b)
-{
-    uint64_t rem, quo;
-    quo = a / b;
-    rem = a % b;
-    return (rem << 32) | (uint32_t) quo;
-}
-
-static uint64_t __attribute ((used)) ppc_div_helper (int64_t a, int32_t b)
-{
-    int64_t rem, quo;
-    quo = a / b;
-    rem = a % b;
-    return (rem << 32) | (uint32_t) quo;
-}
-
-#define MAKE_TRAMPOLINE(name)                   \
-extern void name##_trampoline (void);           \
-asm (#name "_trampoline:\n"                     \
-     " mflr 0\n"                                \
-     " addi 1,1,-112\n"                         \
-     " mr   4,6\n"                              \
-     " stmw 7,0(1)\n"                           \
-     " stw  0,108(0)\n"                         \
-     " bl   ppc_" #name "_helper\n"             \
-     " lmw  7,0(1)\n"                           \
-     " lwz  0,108(0)\n"                         \
-     " addi 1,1,112\n"                          \
-     " mtlr 0\n"                                \
-     " blr\n"                                   \
-    )
-
-MAKE_TRAMPOLINE (div);
-MAKE_TRAMPOLINE (udiv);
-
 static void tcg_out_div2 (TCGContext *s, int uns)
 {
     void *label1_ptr, *label2_ptr;
@@ -1067,7 +1109,7 @@
     label1_ptr = s->code_ptr;
     tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
 
-    tcg_out_b (s, LK, (tcg_target_long) (uns ? udiv_trampoline : div_trampoline));
+    tcg_out_b (s, LK, (tcg_target_long) (uns ? udiv_addr : div_addr));
 
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [4715] Emit trampolines manually in prologue
  2008-06-09 19:57 [Qemu-devel] [4715] Emit trampolines manually in prologue malc
@ 2008-06-09 20:24 ` Fabrice Bellard
  2008-06-09 23:50   ` malc
  0 siblings, 1 reply; 3+ messages in thread
From: Fabrice Bellard @ 2008-06-09 20:24 UTC (permalink / raw)
  To: av1474; +Cc: qemu-devel

This code is too complicated. Please remove it and just call an helper
to do the division if you really need it. The division is a slow
operation so no optimization is needed at this point.

Moreover, as I said previously there is no point in implementing divu[2]
as no QEMU target directly needs it.

Fabrice.

malc wrote:
> Revision: 4715
>           http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4715
> Author:   malc
> Date:     2008-06-09 19:57:36 +0000 (Mon, 09 Jun 2008)
> 
> Log Message:
> -----------
> Emit trampolines manually in prologue
> 
> Modified Paths:
> --------------
>     trunk/tcg/ppc/tcg-target.c
> 
> Modified: trunk/tcg/ppc/tcg-target.c
> ===================================================================
> --- trunk/tcg/ppc/tcg-target.c	2008-06-09 19:57:27 UTC (rev 4714)
> +++ trunk/tcg/ppc/tcg-target.c	2008-06-09 19:57:36 UTC (rev 4715)
> @@ -23,6 +23,8 @@
>   */
>  
>  static uint8_t *tb_ret_addr;
> +static uint8_t *udiv_addr;
> +static uint8_t *div_addr;
>  
>  #define FAST_PATH
>  #if TARGET_PHYS_ADDR_BITS <= 32
> @@ -118,7 +120,7 @@
>  };
>  
>  static const int tcg_target_callee_save_regs[] = {
> -    TCG_REG_R13,                /* sould r13 be saved? */
> +    TCG_REG_R13,                /* should r13 be saved? */
>      TCG_REG_R14,
>      TCG_REG_R15,
>      TCG_REG_R16,
> @@ -135,6 +137,22 @@
>      TCG_REG_R31
>  };
>  
> +static const int div_save_regs[] = {
> +    TCG_REG_R4,
> +    TCG_REG_R5,
> +    TCG_REG_R7,
> +    TCG_REG_R8,
> +    TCG_REG_R9,
> +    TCG_REG_R10,
> +    TCG_REG_R11,
> +    TCG_REG_R12,
> +    TCG_REG_R13,                /* should r13 be saved? */
> +    TCG_REG_R24,
> +    TCG_REG_R25,
> +    TCG_REG_R26,
> +    TCG_REG_R27,
> +};
> +
>  static uint32_t reloc_pc24_val (void *pc, tcg_target_long target)
>  {
>      tcg_target_long disp;
> @@ -799,9 +817,25 @@
>  #endif
>  }
>  
> +static uint64_t ppc_udiv_helper (uint64_t a, uint32_t b)
> +{
> +    uint64_t rem, quo;
> +    quo = a / b;
> +    rem = a % b;
> +    return (rem << 32) | (uint32_t) quo;
> +}
> +
> +static uint64_t ppc_div_helper (int64_t a, int32_t b)
> +{
> +    int64_t rem, quo;
> +    quo = a / b;
> +    rem = a % b;
> +    return (rem << 32) | (uint32_t) quo;
> +}
> +
>  void tcg_target_qemu_prologue (TCGContext *s)
>  {
> -    int i, frame_size;
> +    int i, j, frame_size;
>  
>      frame_size = 0
>          + 4                     /* back chain */
> @@ -837,6 +871,49 @@
>      tcg_out32 (s, MTSPR | RS (0) | LR);
>      tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
>      tcg_out32 (s, BCLR | BO_ALWAYS);
> +
> +    /* div trampolines */
> +    for (j = 0; j < 2; ++j) {
> +        tcg_target_long target;
> +
> +        frame_size = 8 + ARRAY_SIZE (div_save_regs) * 4;
> +        frame_size = (frame_size + 15) & ~15;
> +
> +        if (j == 0) {
> +            target = (tcg_target_long) ppc_udiv_helper;
> +            udiv_addr = s->code_ptr;
> +        }
> +        else {
> +            target = (tcg_target_long) ppc_div_helper;
> +            div_addr = s->code_ptr;
> +        }
> +
> +        tcg_out32 (s, MFSPR | RT (0) | LR);
> +        tcg_out32 (s, STWU | RS (1) | RA (1) | (-frame_size & 0xffff));
> +        for (i = 0; i < ARRAY_SIZE (div_save_regs); ++i)
> +            tcg_out32 (s, (STW
> +                           | RS (div_save_regs[i])
> +                           | RA (1)
> +                           | (i * 4 + 8)
> +                           )
> +                );
> +        tcg_out32 (s, STW | RS (0) | RA (1) | (frame_size - 4));
> +        tcg_out_mov (s, 4, 6);
> +        tcg_out_b (s, LK, target);
> +        tcg_out_mov (s, 6, 4);
> +
> +        for (i = 0; i < ARRAY_SIZE (div_save_regs); ++i)
> +            tcg_out32 (s, (LWZ
> +                           | RT (div_save_regs[i])
> +                           | RA (1)
> +                           | (i * 4 + 8)
> +                           )
> +                );
> +        tcg_out32 (s, LWZ | RT (0) | RA (1) | (frame_size - 4));
> +        tcg_out32 (s, MTSPR | RS (0) | LR);
> +        tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
> +        tcg_out32 (s, BCLR | BO_ALWAYS);
> +    }
>  }
>  
>  static void tcg_out_ld (TCGContext *s, TCGType type, int ret, int arg1,
> @@ -1018,41 +1095,6 @@
>      tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
>  }
>  
> -static uint64_t __attribute ((used)) ppc_udiv_helper (uint64_t a, uint32_t b)
> -{
> -    uint64_t rem, quo;
> -    quo = a / b;
> -    rem = a % b;
> -    return (rem << 32) | (uint32_t) quo;
> -}
> -
> -static uint64_t __attribute ((used)) ppc_div_helper (int64_t a, int32_t b)
> -{
> -    int64_t rem, quo;
> -    quo = a / b;
> -    rem = a % b;
> -    return (rem << 32) | (uint32_t) quo;
> -}
> -
> -#define MAKE_TRAMPOLINE(name)                   \
> -extern void name##_trampoline (void);           \
> -asm (#name "_trampoline:\n"                     \
> -     " mflr 0\n"                                \
> -     " addi 1,1,-112\n"                         \
> -     " mr   4,6\n"                              \
> -     " stmw 7,0(1)\n"                           \
> -     " stw  0,108(0)\n"                         \
> -     " bl   ppc_" #name "_helper\n"             \
> -     " lmw  7,0(1)\n"                           \
> -     " lwz  0,108(0)\n"                         \
> -     " addi 1,1,112\n"                          \
> -     " mtlr 0\n"                                \
> -     " blr\n"                                   \
> -    )
> -
> -MAKE_TRAMPOLINE (div);
> -MAKE_TRAMPOLINE (udiv);
> -
>  static void tcg_out_div2 (TCGContext *s, int uns)
>  {
>      void *label1_ptr, *label2_ptr;
> @@ -1067,7 +1109,7 @@
>      label1_ptr = s->code_ptr;
>      tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
>  
> -    tcg_out_b (s, LK, (tcg_target_long) (uns ? udiv_trampoline : div_trampoline));
> +    tcg_out_b (s, LK, (tcg_target_long) (uns ? udiv_addr : div_addr));
>  
>      label2_ptr = s->code_ptr;
>      tcg_out32 (s, B);
> 
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [4715] Emit trampolines manually in prologue
  2008-06-09 20:24 ` Fabrice Bellard
@ 2008-06-09 23:50   ` malc
  0 siblings, 0 replies; 3+ messages in thread
From: malc @ 2008-06-09 23:50 UTC (permalink / raw)
  To: Fabrice Bellard; +Cc: qemu-devel

On Mon, 9 Jun 2008, Fabrice Bellard wrote:

> This code is too complicated. Please remove it and just call an helper
> to do the division if you really need it. The division is a slow
> operation so no optimization is needed at this point.

The only optimization that there was is a check to see whether dividend
fits to 32 bit and a branch to simple division, everything else was there
to actually call the helper and save callee clobbered registers (since
main TCG proper is oblivious of the call and can't do it for me).

Perhaps there's some other mechanism to easily call helpers from with
the TCG target?

> Moreover, as I said previously there is no point in implementing divu[2]
> as no QEMU target directly needs it.

Yes, it's gone now.

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-06-09 23:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-09 19:57 [Qemu-devel] [4715] Emit trampolines manually in prologue malc
2008-06-09 20:24 ` Fabrice Bellard
2008-06-09 23:50   ` malc

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).