From: Claudio Fontana <claudio.fontana@huawei.com>
To: Richard Henderson <rth@twiddle.net>
Cc: peter.maydell@linaro.org, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v4 04/33] tcg-aarch64: Hoist common argument loads in tcg_out_op
Date: Mon, 16 Sep 2013 09:42:17 +0200 [thread overview]
Message-ID: <5236B659.5090608@huawei.com> (raw)
In-Reply-To: <1379195690-6509-5-git-send-email-rth@twiddle.net>
Hello Richard,
On 14.09.2013 23:54, Richard Henderson wrote:
> This reduces the code size of the function significantly.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/aarch64/tcg-target.c | 95 +++++++++++++++++++++++++-----------------------
> 1 file changed, 50 insertions(+), 45 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 8f19b50..8f5814d 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -1113,15 +1113,22 @@ static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
> }
>
> static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> - const TCGArg *args, const int *const_args)
> + const TCGArg args[TCG_MAX_OP_ARGS],
> + const int const_args[TCG_MAX_OP_ARGS])
> {
> /* 99% of the time, we can signal the use of extension registers
> by looking to see if the opcode handles 64-bit data. */
> TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
>
> + /* Hoist the loads of the most common arguments. */
> + TCGArg a0 = args[0];
> + TCGArg a1 = args[1];
> + TCGArg a2 = args[2];
> + int c2 = const_args[2];
> +
Either all or none (add c0, c1), I would expect the compiler not to generate code for the paths that don't use C[n].
Btw, if the compiler generates bloated code without this, we should notify the projects working on gcc for aarch64.
> switch (opc) {
> case INDEX_op_exit_tb:
> - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
> tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
> break;
>
> @@ -1130,23 +1137,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> #error "USE_DIRECT_JUMP required for aarch64"
> #endif
> assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
> - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> + s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
> /* actual branch destination will be patched by
> aarch64_tb_set_jmp_target later, beware retranslation. */
> tcg_out_goto_noaddr(s);
> - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
> + s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
> break;
>
> case INDEX_op_call:
> if (const_args[0]) {
> - tcg_out_call(s, args[0]);
> + tcg_out_call(s, a0);
> } else {
> - tcg_out_callr(s, args[0]);
> + tcg_out_callr(s, a0);
> }
> break;
>
> case INDEX_op_br:
> - tcg_out_goto_label(s, args[0]);
> + tcg_out_goto_label(s, a0);
> break;
>
> case INDEX_op_ld_i32:
> @@ -1169,97 +1176,95 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> case INDEX_op_st16_i64:
> case INDEX_op_st32_i64:
> tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
> - args[0], args[1], args[2]);
> + a0, a1, a2);
> break;
>
> case INDEX_op_add_i64:
> case INDEX_op_add_i32:
> - tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_sub_i64:
> case INDEX_op_sub_i32:
> - tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_and_i64:
> case INDEX_op_and_i32:
> - tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_or_i64:
> case INDEX_op_or_i32:
> - tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_xor_i64:
> case INDEX_op_xor_i32:
> - tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_mul_i64:
> case INDEX_op_mul_i32:
> - tcg_out_mul(s, ext, args[0], args[1], args[2]);
> + tcg_out_mul(s, ext, a0, a1, a2);
> break;
>
> case INDEX_op_shl_i64:
> case INDEX_op_shl_i32:
> - if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */
> - tcg_out_shl(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
> + tcg_out_shl(s, ext, a0, a1, a2);
> } else { /* LSL / LSLV */
> - tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_shr_i64:
> case INDEX_op_shr_i32:
> - if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */
> - tcg_out_shr(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
> + tcg_out_shr(s, ext, a0, a1, a2);
> } else { /* LSR / LSRV */
> - tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_sar_i64:
> case INDEX_op_sar_i32:
> - if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */
> - tcg_out_sar(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
> + tcg_out_sar(s, ext, a0, a1, a2);
> } else { /* ASR / ASRV */
> - tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_rotr_i64:
> case INDEX_op_rotr_i32:
> - if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */
> - tcg_out_rotr(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
> + tcg_out_rotr(s, ext, a0, a1, a2);
> } else { /* ROR / RORV */
> - tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_rotl_i64:
> case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
> - if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
> - tcg_out_rotl(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
> + tcg_out_rotl(s, ext, a0, a1, a2);
> } else {
> - tcg_out_arith(s, ARITH_SUB, 0,
> - TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
> - tcg_out_shiftrot_reg(s, SRR_ROR, ext,
> - args[0], args[1], TCG_REG_TMP);
> + tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
> + tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
> }
> break;
>
> case INDEX_op_brcond_i64:
> - case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
> - tcg_out_cmp(s, ext, args[0], args[1], 0);
> - tcg_out_goto_label_cond(s, args[2], args[3]);
> + case INDEX_op_brcond_i32:
> + tcg_out_cmp(s, ext, a0, a1, 0);
> + tcg_out_goto_label_cond(s, a2, args[3]);
> break;
>
> case INDEX_op_setcond_i64:
> case INDEX_op_setcond_i32:
> - tcg_out_cmp(s, ext, args[1], args[2], 0);
> - tcg_out_cset(s, 0, args[0], args[3]);
> + tcg_out_cmp(s, ext, a1, a2, 0);
> + tcg_out_cset(s, 0, a0, args[3]);
> break;
>
> case INDEX_op_qemu_ld8u:
> @@ -1305,34 +1310,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> /* FALLTHRU */
> case INDEX_op_bswap64_i64:
> case INDEX_op_bswap32_i32:
> - tcg_out_rev(s, ext, args[0], args[1]);
> + tcg_out_rev(s, ext, a0, a1);
> break;
> case INDEX_op_bswap16_i64:
> case INDEX_op_bswap16_i32:
> - tcg_out_rev16(s, 0, args[0], args[1]);
> + tcg_out_rev16(s, 0, a0, a1);
> break;
>
> case INDEX_op_ext8s_i64:
> case INDEX_op_ext8s_i32:
> - tcg_out_sxt(s, ext, 0, args[0], args[1]);
> + tcg_out_sxt(s, ext, 0, a0, a1);
> break;
> case INDEX_op_ext16s_i64:
> case INDEX_op_ext16s_i32:
> - tcg_out_sxt(s, ext, 1, args[0], args[1]);
> + tcg_out_sxt(s, ext, 1, a0, a1);
> break;
> case INDEX_op_ext32s_i64:
> - tcg_out_sxt(s, 1, 2, args[0], args[1]);
> + tcg_out_sxt(s, 1, 2, a0, a1);
> break;
> case INDEX_op_ext8u_i64:
> case INDEX_op_ext8u_i32:
> - tcg_out_uxt(s, 0, args[0], args[1]);
> + tcg_out_uxt(s, 0, a0, a1);
> break;
> case INDEX_op_ext16u_i64:
> case INDEX_op_ext16u_i32:
> - tcg_out_uxt(s, 1, args[0], args[1]);
> + tcg_out_uxt(s, 1, a0, a1);
> break;
> case INDEX_op_ext32u_i64:
> - tcg_out_movr(s, 0, args[0], args[1]);
> + tcg_out_movr(s, 0, a0, a1);
> break;
>
> case INDEX_op_mov_i64:
>
Claudio
next prev parent reply other threads:[~2013-09-16 7:42 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-14 21:54 [Qemu-devel] [PATCH v4 00/33] tcg-aarch64 improvements Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 01/33] tcg-aarch64: Change all ext variables to TCGType Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 02/33] tcg-aarch64: Set ext based on TCG_OPF_64BIT Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 03/33] tcg-aarch64: Don't handle mov/movi in tcg_out_op Richard Henderson
2013-09-16 7:45 ` Claudio Fontana
2013-09-16 15:07 ` Richard Henderson
2013-09-17 8:05 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 04/33] tcg-aarch64: Hoist common argument loads " Richard Henderson
2013-09-16 7:42 ` Claudio Fontana [this message]
2013-09-16 16:20 ` Richard Henderson
2013-09-17 8:01 ` Claudio Fontana
2013-09-17 14:27 ` Richard Henderson
2013-09-18 8:10 ` Claudio Fontana
2013-09-18 14:00 ` Richard Henderson
2013-09-18 14:18 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 05/33] tcg-aarch64: Change enum aarch64_arith_opc to AArch64Insn Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 06/33] tcg-aarch64: Merge enum aarch64_srr_opc with AArch64Insn Richard Henderson
2013-09-16 7:56 ` Claudio Fontana
2013-09-16 15:06 ` Richard Henderson
2013-09-17 8:51 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 07/33] tcg-aarch64: Remove the shift_imm parameter from tcg_out_cmp Richard Henderson
2013-09-16 8:02 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 08/33] tcg-aarch64: Introduce tcg_fmt_Rdnm and tcg_fmt_Rdnm_lsl Richard Henderson
2013-09-16 8:41 ` Claudio Fontana
2013-09-16 15:32 ` Richard Henderson
2013-09-16 19:11 ` Richard Henderson
2013-09-17 8:23 ` Claudio Fontana
2013-09-17 14:54 ` Richard Henderson
2013-09-18 8:24 ` Claudio Fontana
2013-09-18 14:54 ` Richard Henderson
2013-09-18 15:01 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 09/33] tcg-aarch64: Introduce tcg_fmt_Rdn_aimm Richard Henderson
2013-09-16 8:47 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 10/33] tcg-aarch64: Implement mov with tcg_fmt_* functions Richard Henderson
2013-09-16 8:50 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 11/33] tcg-aarch64: Handle constant operands to add, sub, and compare Richard Henderson
2013-09-16 9:02 ` Claudio Fontana
2013-09-16 15:45 ` Richard Henderson
2013-09-17 8:49 ` Claudio Fontana
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 12/33] tcg-aarch64: Handle constant operands to and, or, xor Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 13/33] tcg-aarch64: Support andc, orc, eqv, not Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 14/33] tcg-aarch64: Handle zero as first argument to sub Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 15/33] tcg-aarch64: Support movcond Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 16/33] tcg-aarch64: Use tcg_fmt_Rdnm_cond for setcond Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 17/33] tcg-aarch64: Support deposit Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 18/33] tcg-aarch64: Support add2, sub2 Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 19/33] tcg-aarch64: Support muluh, mulsh Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 20/33] tcg-aarch64: Support div, rem Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 21/33] tcg-aarch64: Introduce tcg_fmt_Rd_uimm Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 22/33] tcg-aarch64: Use MOVN in tcg_out_movi Richard Henderson
2013-09-16 9:16 ` Claudio Fontana
2013-09-16 15:50 ` Richard Henderson
2013-09-17 7:55 ` Claudio Fontana
2013-09-17 15:56 ` Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 23/33] tcg-aarch64: Use ORRI " Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 24/33] tcg-aarch64: Special case small constants " Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 25/33] tcg-aarch64: Use adrp " Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 26/33] tcg-aarch64: Avoid add with zero in tlb load Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 27/33] tcg-aarch64: Pass return address to load/store helpers directly Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 28/33] tcg-aarch64: Use tcg_out_call for qemu_ld/st Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 29/33] tcg-aarch64: Use symbolic names for branches Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 30/33] tcg-aarch64: Implement tcg_register_jit Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 31/33] tcg-aarch64: Reuse FP and LR in translated code Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 32/33] tcg-aarch64: Introduce tcg_out_ldst_pair Richard Henderson
2013-09-14 21:54 ` [Qemu-devel] [PATCH v4 33/33] tcg-aarch64: Remove redundant CPU_TLB_ENTRY_BITS check Richard Henderson
2013-09-16 9:05 ` Claudio Fontana
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5236B659.5090608@huawei.com \
--to=claudio.fontana@huawei.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.