All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH for-next 6/8] tcg-i386: Use new return-argument ld/st helpers
Date: Thu, 15 Aug 2013 17:54:46 +0200	[thread overview]
Message-ID: <20130815155446.GA29142@ohm.aurel32.net> (raw)
In-Reply-To: <1375726045-20797-7-git-send-email-rth@twiddle.net>

On Mon, Aug 05, 2013 at 08:07:23AM -1000, Richard Henderson wrote:
> Discontinue the jump-around-jump-to-jump scheme, trading it for a single
> immediate move instruction.  The two extra jumps always consume 7 bytes,
> whereas the immediate move is either 5 or 7 bytes depending on where the
> code_gen_buffer gets located.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  include/exec/exec-all.h |  13 +------
>  tcg/i386/tcg-target.c   | 100 +++++++++++++++++++++---------------------------
>  2 files changed, 46 insertions(+), 67 deletions(-)
> 
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 5920f73..b70028a 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -326,18 +326,9 @@ extern uintptr_t tci_tb_ptr;
>     (6) jump to corresponding code of the next of fast path
>   */
>  # if defined(__i386__) || defined(__x86_64__)
> -/* To avoid broken disassembling, long jmp is used for embedding fast path pc,
> -   so that the destination is the next code of fast path, though this jmp is
> -   never executed.
> -
> -   call MMU helper
> -   jmp POST_PROC (2byte)    <- GETRA()
> -   jmp NEXT_CODE (5byte)
> -   POST_PROCESS ...         <- GETRA() + 7
> - */
>  #  define GETRA() ((uintptr_t)__builtin_return_address(0))
> -#  define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \
> -                                    *(int32_t *)((void *)GETRA() + 3) - 1))
> +/* The return address argument for ldst is passed directly.  */
> +#  define GETPC_LDST()  (abort(), 0)

Why an abort here, while in the arm version, you adds support for
not defining GETPC_LDST?

>  # elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
>  #  define GETRA() ((uintptr_t)__builtin_return_address(0))
>  #  define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1))
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 8addfa1..c7a02a3 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -190,11 +190,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
>          /* qemu_ld/st address constraint */
>      case 'L':
>          ct->ct |= TCG_CT_REG;
> -#if TCG_TARGET_REG_BITS == 64
> +        if (TCG_TARGET_REG_BITS == 64) {
>              tcg_regset_set32(ct->u.regs, 0, 0xffff);
> -#else
> +        } else {
>              tcg_regset_set32(ct->u.regs, 0, 0xff);
> -#endif
> +        }
>          tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
>          tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
>          break;
> @@ -1015,22 +1015,24 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
>  
>  #include "exec/softmmu_defs.h"
>  
> -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
> -   int mmu_idx) */
> -static const void *qemu_ld_helpers[4] = {
> -    helper_ldb_mmu,
> -    helper_ldw_mmu,
> -    helper_ldl_mmu,
> -    helper_ldq_mmu,
> +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
> + *                                     int mmu_idx, uintptr_t ra)
> + */
> +static const void * const qemu_ld_helpers[4] = {
> +    helper_ret_ldb_mmu,
> +    helper_ret_ldw_mmu,
> +    helper_ret_ldl_mmu,
> +    helper_ret_ldq_mmu,
>  };
>  
> -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
> -   uintxx_t val, int mmu_idx) */
> -static const void *qemu_st_helpers[4] = {
> -    helper_stb_mmu,
> -    helper_stw_mmu,
> -    helper_stl_mmu,
> -    helper_stq_mmu,
> +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
> + *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
> + */
> +static const void * const qemu_st_helpers[4] = {
> +    helper_ret_stb_mmu,
> +    helper_ret_stw_mmu,
> +    helper_ret_stl_mmu,
> +    helper_ret_stq_mmu,
>  };
>  
>  static void add_qemu_ldst_label(TCGContext *s,
> @@ -1458,6 +1460,12 @@ static void add_qemu_ldst_label(TCGContext *s,
>      }
>  }
>  
> +/* See the GETPC definition in include/exec/exec-all.h.  */
> +static inline uintptr_t do_getpc(uint8_t *raddr)
> +{
> +    return (uintptr_t)raddr - 1;
> +}
> +
>  /*
>   * Generate code for the slow path for a load at the end of block
>   */
> @@ -1490,33 +1498,21 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
>  
>          tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, l->mem_index);
>          tcg_out_st(s, TCG_TYPE_I32, TCG_REG_EAX, TCG_REG_ESP, ofs);
> +        ofs += 4;
> +
> +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, do_getpc(l->raddr));
> +        tcg_out_st(s, TCG_TYPE_I32, TCG_REG_EAX, TCG_REG_ESP, ofs);

Same as the other patch, this can be done in one instruction.

>      } else {
> -        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
> +        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
>          /* The second argument is already loaded with addrlo.  */
>          tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
>                       l->mem_index);
> +        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
> +                     do_getpc(l->raddr));
>      }
>  
> -    /* Code generation of qemu_ld/st's slow path calling MMU helper
> -
> -       PRE_PROC ...
> -       call MMU helper
> -       jmp POST_PROC (2b) : short forward jump <- GETRA()
> -       jmp next_code (5b) : dummy long backward jump which is never executed
> -       POST_PROC ... : do post-processing <- GETRA() + 7
> -       jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
> -    */
> -
>      tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
>  
> -    /* Jump to post-processing code */
> -    tcg_out8(s, OPC_JMP_short);
> -    tcg_out8(s, 5);
> -    /* Dummy backward jump having information of fast path'pc for MMU helpers */
> -    tcg_out8(s, OPC_JMP_long);
> -    *(int32_t *)s->code_ptr = (int32_t)(l->raddr - s->code_ptr - 4);
> -    s->code_ptr += 4;
> -
>      data_reg = l->datalo_reg;
>      switch(opc) {
>      case 0 | 4:
> @@ -1598,36 +1594,28 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
>  
>          tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, l->mem_index);
>          tcg_out_st(s, TCG_TYPE_I32, TCG_REG_EAX, TCG_REG_ESP, ofs);
> +        ofs += 4;
> +
> +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, do_getpc(l->raddr));
> +        tcg_out_st(s, TCG_TYPE_I32, TCG_REG_EAX, TCG_REG_ESP, ofs);
>      } else {
> -        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
> +        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
>          /* The second argument is already loaded with addrlo.  */
>          tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                      tcg_target_call_iarg_regs[2], l->datalo_reg);
>          tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
>                       l->mem_index);
> +        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
> +            tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[4],
> +                         do_getpc(l->raddr));
> +        } else {
> +            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, do_getpc(l->raddr));
> +            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RAX, TCG_REG_ESP, 0);
> +        }
>      }
>  
> -    /* Code generation of qemu_ld/st's slow path calling MMU helper
> -
> -       PRE_PROC ...
> -       call MMU helper
> -       jmp POST_PROC (2b) : short forward jump <- GETRA()
> -       jmp next_code (5b) : dummy long backward jump which is never executed
> -       POST_PROC ... : do post-processing <- GETRA() + 7
> -       jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
> -    */
> -
>      tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
>  
> -    /* Jump to post-processing code */
> -    tcg_out8(s, OPC_JMP_short);
> -    tcg_out8(s, 5);
> -    /* Dummy backward jump having information of fast path'pc for MMU helpers */
> -    tcg_out8(s, OPC_JMP_long);
> -    *(int32_t *)s->code_ptr = (int32_t)(l->raddr - s->code_ptr - 4);
> -    s->code_ptr += 4;
> -
> -    /* Jump to the code corresponding to next IR of qemu_st */
>      tcg_out_jmp(s, (tcg_target_long)l->raddr);
>  }
>  

Beside the small nitpicking above, it looks fine to me.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

  reply	other threads:[~2013-08-15 15:54 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-05 18:07 [Qemu-devel] [PATCH for-next 0/8] Improve tcg ldst optimization Richard Henderson
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 1/8] tcg-i386: Add and use tcg_out64 Richard Henderson
2013-08-15 15:54   ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 2/8] tcg-i386: Try pc-relative lea for constant formation Richard Henderson
2013-08-15 15:54   ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 3/8] tcg-i386: Tidy qemu_ld/st slow path Richard Henderson
2013-08-15 15:54   ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 4/8] tcg: Add mmu helpers that take a return address argument Richard Henderson
2013-08-15 15:54   ` Aurelien Jarno
2013-08-15 20:45     ` Richard Henderson
2013-08-16  8:35       ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 5/8] tcg: Tidy softmmu_template.h Richard Henderson
2013-08-15 15:54   ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 6/8] tcg-i386: Use new return-argument ld/st helpers Richard Henderson
2013-08-15 15:54   ` Aurelien Jarno [this message]
2013-08-15 20:44     ` Richard Henderson
2013-08-16  8:35       ` Aurelien Jarno
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 7/8] tcg-arm: Use ldrd/strd for appropriate qemu_ld/st64 Richard Henderson
2013-08-05 18:07 ` [Qemu-devel] [PATCH for-next 8/8] tcg-arm: Rearrange slow-path qemu_ld/st Richard Henderson
2013-08-16  8:36   ` Aurelien Jarno
2013-08-16  8:55   ` Andreas Färber

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130815155446.GA29142@ohm.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.