From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH for-next 3/8] tcg-i386: Tidy qemu_ld/st slow path
Date: Thu, 15 Aug 2013 17:54:38 +0200 [thread overview]
Message-ID: <20130815155438.GA19231@ohm.aurel32.net> (raw)
In-Reply-To: <1375726045-20797-4-git-send-email-rth@twiddle.net>
On Mon, Aug 05, 2013 at 08:07:20AM -1000, Richard Henderson wrote:
> Use existing stack space for arguments; don't push/pop.
> Use less ifdefs and more C ifs.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/i386/tcg-target.c | 159 +++++++++++++++++++++-----------------------------
> 1 file changed, 68 insertions(+), 91 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 456bd9e..8addfa1 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1461,22 +1461,12 @@ static void add_qemu_ldst_label(TCGContext *s,
> /*
> * Generate code for the slow path for a load at the end of block
> */
> -static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
> {
> - int s_bits;
> - int opc = label->opc;
> - int mem_index = label->mem_index;
> -#if TCG_TARGET_REG_BITS == 32
> - int stack_adjust;
> - int addrlo_reg = label->addrlo_reg;
> - int addrhi_reg = label->addrhi_reg;
> -#endif
> - int data_reg = label->datalo_reg;
> - int data_reg2 = label->datahi_reg;
> - uint8_t *raddr = label->raddr;
> - uint8_t **label_ptr = &label->label_ptr[0];
> -
> - s_bits = opc & 3;
> + int opc = l->opc;
> + int s_bits = opc & 3;
> + TCGReg data_reg;
> + uint8_t **label_ptr = &l->label_ptr[0];
>
> /* resolve label address */
> *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
> @@ -1484,22 +1474,28 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
> }
>
> -#if TCG_TARGET_REG_BITS == 32
> - tcg_out_pushi(s, mem_index);
> - stack_adjust = 4;
> - if (TARGET_LONG_BITS == 64) {
> - tcg_out_push(s, addrhi_reg);
> - stack_adjust += 4;
> + if (TCG_TARGET_REG_BITS == 32) {
> + int ofs = 0;
> +
> + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
> + ofs += 4;
> +
> + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
> + ofs += 4;
> +
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
> + ofs += 4;
> + }
> +
> + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, l->mem_index);
> + tcg_out_st(s, TCG_TYPE_I32, TCG_REG_EAX, TCG_REG_ESP, ofs);
The same way the previous code was able to push an immediate, it should
be possible to directly store an immediate value here:
tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, TCG_REG_ESP, ofs);
tcg_out32(s, l->mem_index);
> + } else {
> + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
> + /* The second argument is already loaded with addrlo. */
> + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
> + l->mem_index);
> }
> - tcg_out_push(s, addrlo_reg);
> - stack_adjust += 4;
> - tcg_out_push(s, TCG_AREG0);
> - stack_adjust += 4;
> -#else
> - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
> - /* The second argument is already loaded with addrlo. */
> - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
> -#endif
>
> /* Code generation of qemu_ld/st's slow path calling MMU helper
>
> @@ -1518,18 +1514,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> tcg_out8(s, 5);
> /* Dummy backward jump having information of fast path'pc for MMU helpers */
> tcg_out8(s, OPC_JMP_long);
> - *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
> + *(int32_t *)s->code_ptr = (int32_t)(l->raddr - s->code_ptr - 4);
> s->code_ptr += 4;
>
> -#if TCG_TARGET_REG_BITS == 32
> - if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
> - /* Pop and discard. This is 2 bytes smaller than the add. */
> - tcg_out_pop(s, TCG_REG_ECX);
> - } else if (stack_adjust != 0) {
> - tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
> - }
> -#endif
> -
> + data_reg = l->datalo_reg;
> switch(opc) {
> case 0 | 4:
> tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
> @@ -1557,10 +1545,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> } else if (data_reg == TCG_REG_EDX) {
> /* xchg %edx, %eax */
> tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
> - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
> + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
> } else {
> tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
> - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
> + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
> }
> break;
> default:
> @@ -1568,28 +1556,17 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> }
>
> /* Jump to the code corresponding to next IR of qemu_st */
> - tcg_out_jmp(s, (tcg_target_long)raddr);
> + tcg_out_jmp(s, (tcg_target_long)l->raddr);
> }
>
> /*
> * Generate code for the slow path for a store at the end of block
> */
> -static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
> {
> - int s_bits;
> - int stack_adjust;
> - int opc = label->opc;
> - int mem_index = label->mem_index;
> - int data_reg = label->datalo_reg;
> -#if TCG_TARGET_REG_BITS == 32
> - int data_reg2 = label->datahi_reg;
> - int addrlo_reg = label->addrlo_reg;
> - int addrhi_reg = label->addrhi_reg;
> -#endif
> - uint8_t *raddr = label->raddr;
> - uint8_t **label_ptr = &label->label_ptr[0];
> -
> - s_bits = opc & 3;
> + int opc = l->opc;
> + int s_bits = opc & 3;
> + uint8_t **label_ptr = &l->label_ptr[0];
>
> /* resolve label address */
> *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
> @@ -1597,31 +1574,38 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
> }
>
> -#if TCG_TARGET_REG_BITS == 32
> - tcg_out_pushi(s, mem_index);
> - stack_adjust = 4;
> - if (opc == 3) {
> - tcg_out_push(s, data_reg2);
> - stack_adjust += 4;
> - }
> - tcg_out_push(s, data_reg);
> - stack_adjust += 4;
> - if (TARGET_LONG_BITS == 64) {
> - tcg_out_push(s, addrhi_reg);
> - stack_adjust += 4;
> + if (TCG_TARGET_REG_BITS == 32) {
> + int ofs = 0;
> +
> + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
> + ofs += 4;
> +
> + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
> + ofs += 4;
> +
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
> + ofs += 4;
> + }
> +
> + tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
> + ofs += 4;
> +
> + if (opc == 3) {
> + tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
> + ofs += 4;
> + }
> +
> + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, l->mem_index);
> + tcg_out_st(s, TCG_TYPE_I32, TCG_REG_EAX, TCG_REG_ESP, ofs);
Same there.
> + } else {
> + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
> + /* The second argument is already loaded with addrlo. */
> + tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
> + tcg_target_call_iarg_regs[2], l->datalo_reg);
> + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
> + l->mem_index);
> }
> - tcg_out_push(s, addrlo_reg);
> - stack_adjust += 4;
> - tcg_out_push(s, TCG_AREG0);
> - stack_adjust += 4;
> -#else
> - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
> - /* The second argument is already loaded with addrlo. */
> - tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
> - tcg_target_call_iarg_regs[2], data_reg);
> - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
> - stack_adjust = 0;
> -#endif
>
> /* Code generation of qemu_ld/st's slow path calling MMU helper
>
> @@ -1640,18 +1624,11 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
> tcg_out8(s, 5);
> /* Dummy backward jump having information of fast path'pc for MMU helpers */
> tcg_out8(s, OPC_JMP_long);
> - *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
> + *(int32_t *)s->code_ptr = (int32_t)(l->raddr - s->code_ptr - 4);
> s->code_ptr += 4;
>
> - if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
> - /* Pop and discard. This is 2 bytes smaller than the add. */
> - tcg_out_pop(s, TCG_REG_ECX);
> - } else if (stack_adjust != 0) {
> - tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
> - }
> -
> /* Jump to the code corresponding to next IR of qemu_st */
> - tcg_out_jmp(s, (tcg_target_long)raddr);
> + tcg_out_jmp(s, (tcg_target_long)l->raddr);
> }
>
The remaining looks fine to me.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
next prev parent reply other threads:[~2013-08-15 15:54 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-05 18:07 [Qemu-devel] [PATCH for-next 0/8] Improve tcg ldst optimization Richard Henderson
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 1/8] tcg-i386: Add and use tcg_out64 Richard Henderson
2013-08-15 15:54 ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 2/8] tcg-i386: Try pc-relative lea for constant formation Richard Henderson
2013-08-15 15:54 ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 3/8] tcg-i386: Tidy qemu_ld/st slow path Richard Henderson
2013-08-15 15:54 ` Aurelien Jarno [this message]
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 4/8] tcg: Add mmu helpers that take a return address argument Richard Henderson
2013-08-15 15:54 ` Aurelien Jarno
2013-08-15 20:45 ` Richard Henderson
2013-08-16 8:35 ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 5/8] tcg: Tidy softmmu_template.h Richard Henderson
2013-08-15 15:54 ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 6/8] tcg-i386: Use new return-argument ld/st helpers Richard Henderson
2013-08-15 15:54 ` Aurelien Jarno
2013-08-15 20:44 ` Richard Henderson
2013-08-16 8:35 ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 7/8] tcg-arm: Use ldrd/strd for appropriate qemu_ld/st64 Richard Henderson
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 8/8] tcg-arm: Rearrange slow-path qemu_ld/st Richard Henderson
2013-08-16 8:36 ` Aurelien Jarno
2013-08-16 8:55 ` Andreas Färber
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130815155438.GA19231@ohm.aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.