From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:39959) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QMQZ7-000817-00 for qemu-devel@nongnu.org; Tue, 17 May 2011 16:08:26 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1QMQZ5-00078z-UE for qemu-devel@nongnu.org; Tue, 17 May 2011 16:08:24 -0400 Received: from mail-qw0-f45.google.com ([209.85.216.45]:45812) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QMQZ5-00078t-Px for qemu-devel@nongnu.org; Tue, 17 May 2011 16:08:23 -0400 Received: by qwj8 with SMTP id 8so525656qwj.4 for ; Tue, 17 May 2011 13:08:23 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: <20110517184636.GW30615@hall.aurel32.net> References: <20110517184636.GW30615@hall.aurel32.net> From: Blue Swirl Date: Tue, 17 May 2011 23:08:03 +0300 Message-ID: Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH 06/11] TCG/x86: use stack for TCG temps List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Aurelien Jarno Cc: qemu-devel On Tue, May 17, 2011 at 9:46 PM, Aurelien Jarno wrot= e: > On Sat, May 14, 2011 at 10:38:40PM +0300, Blue Swirl wrote: >> Use stack instead of temp_buf array in CPUState for TCG >> temps. >> >> Signed-off-by: Blue Swirl >> --- >> =C2=A0tcg/i386/tcg-target.c | =C2=A0 19 ++++++++++--------- >> =C2=A01 files changed, 10 insertions(+), 9 deletions(-) >> >> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c >> index 01747f3..0e168ea 100644 >> --- a/tcg/i386/tcg-target.c >> +++ b/tcg/i386/tcg-target.c >> @@ -1918,19 +1918,22 @@ static void tcg_target_qemu_prologue(TCGContext = *s) >> >> =C2=A0 =C2=A0 =C2=A0/* TB prologue */ >> >> - =C2=A0 =C2=A0/* Save all callee saved registers. =C2=A0*/ >> - =C2=A0 =C2=A0for (i =3D 0; i < ARRAY_SIZE(tcg_target_callee_save_regs)= ; i++) { >> - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_push(s, tcg_target_callee_save_regs= [i]); >> - =C2=A0 =C2=A0} >> - >> - =C2=A0 =C2=A0/* Reserve some stack space. =C2=A0*/ >> + =C2=A0 =C2=A0/* Reserve some stack space, also for TCG temps. =C2=A0*/ >> =C2=A0 =C2=A0 =C2=A0push_size =3D 1 + ARRAY_SIZE(tcg_target_callee_save_= regs); >> =C2=A0 =C2=A0 =C2=A0push_size *=3D TCG_TARGET_REG_BITS / 8; >> >> - =C2=A0 =C2=A0frame_size =3D push_size + TCG_STATIC_CALL_ARGS_SIZE; >> + =C2=A0 =C2=A0frame_size =3D push_size + TCG_STATIC_CALL_ARGS_SIZE + >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0CPU_TEMP_BUF_NLONGS * sizeof(long); >> =C2=A0 =C2=A0 =C2=A0frame_size =3D (frame_size + TCG_TARGET_STACK_ALIGN = - 1) & >> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0~(TCG_TARGET_STACK_ALIGN - 1); >> =C2=A0 =C2=A0 =C2=A0stack_addend =3D frame_size - push_size; >> + =C2=A0 =C2=A0tcg_set_frame(s, TCG_REG_ESP, 0, CPU_TEMP_BUF_NLONGS * si= zeof(long)); > > You should probably use TCG_REG_CALL_STACK instead of hardcoading the > register. OK. >> + >> + =C2=A0 =C2=A0/* Save all callee saved registers. =C2=A0*/ >> + =C2=A0 =C2=A0for (i =3D 0; i < ARRAY_SIZE(tcg_target_callee_save_regs)= ; i++) { >> + =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_push(s, tcg_target_callee_save_regs= [i]); >> + =C2=A0 =C2=A0} >> + >> =C2=A0 =C2=A0 =C2=A0tcg_out_addi(s, TCG_REG_ESP, -stack_addend); >> >> =C2=A0 =C2=A0 =C2=A0/* jmp *tb. =C2=A0*/ >> @@ -1979,6 +1982,4 @@ static void tcg_target_init(TCGContext *s) >> =C2=A0 =C2=A0 =C2=A0tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP); >> >> =C2=A0 =C2=A0 =C2=A0tcg_add_target_add_op_defs(x86_op_defs); >> - =C2=A0 =C2=A0tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf), >> - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0CPU_TEMP= _BUF_NLONGS * sizeof(long)); >> =C2=A0} > > Note that this patch is likely to break calls to helpers which need > parameters on the stack, by judging at the current code (I haven't > tested it in practice): > > | =C2=A0 =C2=A0 if (allocate_args) { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_= DIR(call_stack_size)); > | =C2=A0 =C2=A0 } > > The stack register (esp) is decreased. > > | =C2=A0 =C2=A0 stack_offset =3D TCG_TARGET_CALL_STACK_OFFSET; > | =C2=A0 =C2=A0 for(i =3D nb_regs; i < nb_params; i++) { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 arg =3D args[nb_oargs + i]; > | #ifdef TCG_TARGET_STACK_GROWSUP > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 stack_offset -=3D sizeof(tcg_target_long); > | #endif > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 if (arg !=3D TCG_CALL_DUMMY_ARG) { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 ts =3D &s->temps[arg]; > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if (ts->val_type =3D=3D TEMP_= VAL_REG) { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_st(s, t= s->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 } else if (ts->val_type =3D= =3D TEMP_VAL_MEM) { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 reg =3D tcg_reg= _alloc(s, tcg_target_available_regs[ts->type], > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 s->reserved_regs); > > tcg_reg_alloc() may spill some register, and save them in the allocated > frame. If it is referenced by the stack pointer, given it has been > changed, it won't be save at the write place. > > > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 /* XXX: not cor= rect if reading values from the stack */ > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_ld(s, t= s->type, reg, ts->mem_reg, ts->mem_offset); > > As the comment said, if ts->mem_reg is the stack register (like with > this patch), given it has been increased above, the wrong value will be > read. > > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_st(s, t= s->type, reg, TCG_REG_CALL_STACK, stack_offset); > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 } else if (ts->val_type =3D= =3D TEMP_VAL_CONST) { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 reg =3D tcg_reg= _alloc(s, tcg_target_available_regs[ts->type], > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 s->reserved_regs); > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 /* XXX: sign ex= tend may be needed on some targets */ > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_movi(s,= ts->type, reg, ts->val); > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_st(s, t= s->type, reg, TCG_REG_CALL_STACK, stack_offset); > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 } else { > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_abort(); > | =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 } Maybe the frame pointer register could be set up and used instead, but that would cost one extra register. Alternatively it may be possible to avoid changing stack pointer. We know in advance all possible helpers and the number of their parameters, so the stack could be preallocated as suggested by a comment.