From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1K5nwB-0008GK-KQ for qemu-devel@nongnu.org; Mon, 09 Jun 2008 16:25:55 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1K5nw9-0008Fe-Qb for qemu-devel@nongnu.org; Mon, 09 Jun 2008 16:25:55 -0400 Received: from [199.232.76.173] (port=36207 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1K5nw9-0008Fb-LN for qemu-devel@nongnu.org; Mon, 09 Jun 2008 16:25:53 -0400 Received: from relay1-v.mail.gandi.net ([217.70.178.75]:51411) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1K5nw9-0007oI-88 for qemu-devel@nongnu.org; Mon, 09 Jun 2008 16:25:53 -0400 Message-ID: <484D9188.10500@bellard.org> Date: Mon, 09 Jun 2008 22:24:40 +0200 From: Fabrice Bellard MIME-Version: 1.0 Subject: Re: [Qemu-devel] [4715] Emit trampolines manually in prologue References: In-Reply-To: Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: av1474@comtv.ru Cc: qemu-devel@nongnu.org This code is too complicated. Please remove it and just call an helper to do the division if you really need it. The division is a slow operation so no optimization is needed at this point. Moreover, as I said previously there is no point in implementing divu[2] as no QEMU target directly needs it. Fabrice. malc wrote: > Revision: 4715 > http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4715 > Author: malc > Date: 2008-06-09 19:57:36 +0000 (Mon, 09 Jun 2008) > > Log Message: > ----------- > Emit trampolines manually in prologue > > Modified Paths: > -------------- > trunk/tcg/ppc/tcg-target.c > > Modified: trunk/tcg/ppc/tcg-target.c > =================================================================== > --- trunk/tcg/ppc/tcg-target.c 2008-06-09 19:57:27 UTC (rev 4714) > +++ trunk/tcg/ppc/tcg-target.c 2008-06-09 19:57:36 UTC (rev 4715) > @@ -23,6 +23,8 @@ > */ > > static uint8_t *tb_ret_addr; > +static uint8_t *udiv_addr; > +static uint8_t *div_addr; > > #define FAST_PATH > #if TARGET_PHYS_ADDR_BITS <= 32 > @@ -118,7 +120,7 @@ > }; > > static const int tcg_target_callee_save_regs[] = { > - TCG_REG_R13, /* sould r13 be saved? */ > + TCG_REG_R13, /* should r13 be saved? */ > TCG_REG_R14, > TCG_REG_R15, > TCG_REG_R16, > @@ -135,6 +137,22 @@ > TCG_REG_R31 > }; > > +static const int div_save_regs[] = { > + TCG_REG_R4, > + TCG_REG_R5, > + TCG_REG_R7, > + TCG_REG_R8, > + TCG_REG_R9, > + TCG_REG_R10, > + TCG_REG_R11, > + TCG_REG_R12, > + TCG_REG_R13, /* should r13 be saved? */ > + TCG_REG_R24, > + TCG_REG_R25, > + TCG_REG_R26, > + TCG_REG_R27, > +}; > + > static uint32_t reloc_pc24_val (void *pc, tcg_target_long target) > { > tcg_target_long disp; > @@ -799,9 +817,25 @@ > #endif > } > > +static uint64_t ppc_udiv_helper (uint64_t a, uint32_t b) > +{ > + uint64_t rem, quo; > + quo = a / b; > + rem = a % b; > + return (rem << 32) | (uint32_t) quo; > +} > + > +static uint64_t ppc_div_helper (int64_t a, int32_t b) > +{ > + int64_t rem, quo; > + quo = a / b; > + rem = a % b; > + return (rem << 32) | (uint32_t) quo; > +} > + > void tcg_target_qemu_prologue (TCGContext *s) > { > - int i, frame_size; > + int i, j, frame_size; > > frame_size = 0 > + 4 /* back chain */ > @@ -837,6 +871,49 @@ > tcg_out32 (s, MTSPR | RS (0) | LR); > tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size); > tcg_out32 (s, BCLR | BO_ALWAYS); > + > + /* div trampolines */ > + for (j = 0; j < 2; ++j) { > + tcg_target_long target; > + > + frame_size = 8 + ARRAY_SIZE (div_save_regs) * 4; > + frame_size = (frame_size + 15) & ~15; > + > + if (j == 0) { > + target = (tcg_target_long) ppc_udiv_helper; > + udiv_addr = s->code_ptr; > + } > + else { > + target = (tcg_target_long) ppc_div_helper; > + div_addr = s->code_ptr; > + } > + > + tcg_out32 (s, MFSPR | RT (0) | LR); > + tcg_out32 (s, STWU | RS (1) | RA (1) | (-frame_size & 0xffff)); > + for (i = 0; i < ARRAY_SIZE (div_save_regs); ++i) > + tcg_out32 (s, (STW > + | RS (div_save_regs[i]) > + | RA (1) > + | (i * 4 + 8) > + ) > + ); > + tcg_out32 (s, STW | RS (0) | RA (1) | (frame_size - 4)); > + tcg_out_mov (s, 4, 6); > + tcg_out_b (s, LK, target); > + tcg_out_mov (s, 6, 4); > + > + for (i = 0; i < ARRAY_SIZE (div_save_regs); ++i) > + tcg_out32 (s, (LWZ > + | RT (div_save_regs[i]) > + | RA (1) > + | (i * 4 + 8) > + ) > + ); > + tcg_out32 (s, LWZ | RT (0) | RA (1) | (frame_size - 4)); > + tcg_out32 (s, MTSPR | RS (0) | LR); > + tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size); > + tcg_out32 (s, BCLR | BO_ALWAYS); > + } > } > > static void tcg_out_ld (TCGContext *s, TCGType type, int ret, int arg1, > @@ -1018,41 +1095,6 @@ > tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr); > } > > -static uint64_t __attribute ((used)) ppc_udiv_helper (uint64_t a, uint32_t b) > -{ > - uint64_t rem, quo; > - quo = a / b; > - rem = a % b; > - return (rem << 32) | (uint32_t) quo; > -} > - > -static uint64_t __attribute ((used)) ppc_div_helper (int64_t a, int32_t b) > -{ > - int64_t rem, quo; > - quo = a / b; > - rem = a % b; > - return (rem << 32) | (uint32_t) quo; > -} > - > -#define MAKE_TRAMPOLINE(name) \ > -extern void name##_trampoline (void); \ > -asm (#name "_trampoline:\n" \ > - " mflr 0\n" \ > - " addi 1,1,-112\n" \ > - " mr 4,6\n" \ > - " stmw 7,0(1)\n" \ > - " stw 0,108(0)\n" \ > - " bl ppc_" #name "_helper\n" \ > - " lmw 7,0(1)\n" \ > - " lwz 0,108(0)\n" \ > - " addi 1,1,112\n" \ > - " mtlr 0\n" \ > - " blr\n" \ > - ) > - > -MAKE_TRAMPOLINE (div); > -MAKE_TRAMPOLINE (udiv); > - > static void tcg_out_div2 (TCGContext *s, int uns) > { > void *label1_ptr, *label2_ptr; > @@ -1067,7 +1109,7 @@ > label1_ptr = s->code_ptr; > tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE); > > - tcg_out_b (s, LK, (tcg_target_long) (uns ? udiv_trampoline : div_trampoline)); > + tcg_out_b (s, LK, (tcg_target_long) (uns ? udiv_addr : div_addr)); > > label2_ptr = s->code_ptr; > tcg_out32 (s, B); > > > > >