From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <rth@twiddle.net>
Cc: Peter Maydell <peter.maydell@linaro.org>,
qemu-devel@nongnu.org,
Claudio Fontana <claudio.fontana@gmail.com>
Subject: Re: [Qemu-devel] [PATCH 06/10] tcg-aarch64: Hoist common argument loadsin tcg_out_opp
Date: Wed, 05 Mar 2014 12:14:37 +0000 [thread overview]
Message-ID: <87a9d4deqa.fsf@linaro.org> (raw)
In-Reply-To: <1393952650-16802-7-git-send-email-rth@twiddle.net>
Richard Henderson <rth@twiddle.net> writes:
> This reduces the code size of the function significantly.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/aarch64/tcg-target.c | 95 +++++++++++++++++++++++++-----------------------
> 1 file changed, 50 insertions(+), 45 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 8f68450..dca7632 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -1098,15 +1098,22 @@ static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
> }
>
> static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> - const TCGArg *args, const int *const_args)
> + const TCGArg args[TCG_MAX_OP_ARGS],
> + const int const_args[TCG_MAX_OP_ARGS])
> {
> /* 99% of the time, we can signal the use of extension registers
> by looking to see if the opcode handles 64-bit data. */
> TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
>
> + /* Hoist the loads of the most common arguments. */
> + TCGArg a0 = args[0];
> + TCGArg a1 = args[1];
> + TCGArg a2 = args[2];
> + int c2 = const_args[2];
> +
The code certainly looks a lot cleaner but I'm not sure what this gains
us. Surely the compiler should have done all this for us?
But an improvement so:
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> switch (opc) {
> case INDEX_op_exit_tb:
> - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
> tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
> break;
>
> @@ -1115,23 +1122,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> #error "USE_DIRECT_JUMP required for aarch64"
> #endif
> assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
> - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> + s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
> /* actual branch destination will be patched by
> aarch64_tb_set_jmp_target later, beware retranslation. */
> tcg_out_goto_noaddr(s);
> - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
> + s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
> break;
>
> case INDEX_op_call:
> if (const_args[0]) {
> - tcg_out_call(s, args[0]);
> + tcg_out_call(s, a0);
> } else {
> - tcg_out_callr(s, args[0]);
> + tcg_out_callr(s, a0);
> }
> break;
>
> case INDEX_op_br:
> - tcg_out_goto_label(s, args[0]);
> + tcg_out_goto_label(s, a0);
> break;
>
> case INDEX_op_ld_i32:
> @@ -1154,97 +1161,95 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> case INDEX_op_st16_i64:
> case INDEX_op_st32_i64:
> tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
> - args[0], args[1], args[2]);
> + a0, a1, a2);
> break;
>
> case INDEX_op_add_i64:
> case INDEX_op_add_i32:
> - tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_sub_i64:
> case INDEX_op_sub_i32:
> - tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_and_i64:
> case INDEX_op_and_i32:
> - tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_or_i64:
> case INDEX_op_or_i32:
> - tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_xor_i64:
> case INDEX_op_xor_i32:
> - tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
> + tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
> break;
>
> case INDEX_op_mul_i64:
> case INDEX_op_mul_i32:
> - tcg_out_mul(s, ext, args[0], args[1], args[2]);
> + tcg_out_mul(s, ext, a0, a1, a2);
> break;
>
> case INDEX_op_shl_i64:
> case INDEX_op_shl_i32:
> - if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */
> - tcg_out_shl(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
> + tcg_out_shl(s, ext, a0, a1, a2);
> } else { /* LSL / LSLV */
> - tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_shr_i64:
> case INDEX_op_shr_i32:
> - if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */
> - tcg_out_shr(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
> + tcg_out_shr(s, ext, a0, a1, a2);
> } else { /* LSR / LSRV */
> - tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_sar_i64:
> case INDEX_op_sar_i32:
> - if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */
> - tcg_out_sar(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
> + tcg_out_sar(s, ext, a0, a1, a2);
> } else { /* ASR / ASRV */
> - tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_rotr_i64:
> case INDEX_op_rotr_i32:
> - if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */
> - tcg_out_rotr(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
> + tcg_out_rotr(s, ext, a0, a1, a2);
> } else { /* ROR / RORV */
> - tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
> + tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
> }
> break;
>
> case INDEX_op_rotl_i64:
> case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
> - if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
> - tcg_out_rotl(s, ext, args[0], args[1], args[2]);
> + if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
> + tcg_out_rotl(s, ext, a0, a1, a2);
> } else {
> - tcg_out_arith(s, ARITH_SUB, 0,
> - TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
> - tcg_out_shiftrot_reg(s, SRR_ROR, ext,
> - args[0], args[1], TCG_REG_TMP);
> + tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
> + tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
> }
> break;
>
> case INDEX_op_brcond_i64:
> - case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
> - tcg_out_cmp(s, ext, args[0], args[1], 0);
> - tcg_out_goto_label_cond(s, args[2], args[3]);
> + case INDEX_op_brcond_i32:
> + tcg_out_cmp(s, ext, a0, a1, 0);
> + tcg_out_goto_label_cond(s, a2, args[3]);
> break;
>
> case INDEX_op_setcond_i64:
> case INDEX_op_setcond_i32:
> - tcg_out_cmp(s, ext, args[1], args[2], 0);
> - tcg_out_cset(s, 0, args[0], args[3]);
> + tcg_out_cmp(s, ext, a1, a2, 0);
> + tcg_out_cset(s, 0, a0, args[3]);
> break;
>
> case INDEX_op_qemu_ld8u:
> @@ -1290,34 +1295,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> /* FALLTHRU */
> case INDEX_op_bswap64_i64:
> case INDEX_op_bswap32_i32:
> - tcg_out_rev(s, ext, args[0], args[1]);
> + tcg_out_rev(s, ext, a0, a1);
> break;
> case INDEX_op_bswap16_i64:
> case INDEX_op_bswap16_i32:
> - tcg_out_rev16(s, 0, args[0], args[1]);
> + tcg_out_rev16(s, 0, a0, a1);
> break;
>
> case INDEX_op_ext8s_i64:
> case INDEX_op_ext8s_i32:
> - tcg_out_sxt(s, ext, 0, args[0], args[1]);
> + tcg_out_sxt(s, ext, 0, a0, a1);
> break;
> case INDEX_op_ext16s_i64:
> case INDEX_op_ext16s_i32:
> - tcg_out_sxt(s, ext, 1, args[0], args[1]);
> + tcg_out_sxt(s, ext, 1, a0, a1);
> break;
> case INDEX_op_ext32s_i64:
> - tcg_out_sxt(s, 1, 2, args[0], args[1]);
> + tcg_out_sxt(s, 1, 2, a0, a1);
> break;
> case INDEX_op_ext8u_i64:
> case INDEX_op_ext8u_i32:
> - tcg_out_uxt(s, 0, args[0], args[1]);
> + tcg_out_uxt(s, 0, a0, a1);
> break;
> case INDEX_op_ext16u_i64:
> case INDEX_op_ext16u_i32:
> - tcg_out_uxt(s, 1, args[0], args[1]);
> + tcg_out_uxt(s, 1, a0, a1);
> break;
> case INDEX_op_ext32u_i64:
> - tcg_out_movr(s, 0, args[0], args[1]);
> + tcg_out_movr(s, 0, a0, a1);
> break;
>
> case INDEX_op_mov_i64:
--
Alex Bennée
next prev parent reply other threads:[~2014-03-05 12:14 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-04 17:04 [Qemu-devel] [PATCH 00/10] tcg/aarch64 cleanups Richard Henderson
2014-03-04 17:04 ` [Qemu-devel] [PATCH 01/10] tcg-aarch64: Enable builtin disassembler Richard Henderson
2014-03-05 12:03 ` Alex Bennée
2014-03-04 17:04 ` [Qemu-devel] [PATCH 02/10] tcg-aarch64: Remove redundant CPU_TLB_ENTRY_BITS check Richard Henderson
2014-03-05 12:03 ` [Qemu-devel] [PATCH 02/10] tcg-aarch64: Remove redundantCPU_TLB_ENTRY_BITS checkk Alex Bennée
2014-03-04 17:04 ` [Qemu-devel] [PATCH 03/10] tcg-aarch64: Change all ext variables to TCGType Richard Henderson
2014-03-04 17:04 ` [Qemu-devel] [PATCH 04/10] tcg-aarch64: Set ext based on TCG_OPF_64BIT Richard Henderson
2014-03-05 12:08 ` [Qemu-devel] [PATCH 04/10] tcg-aarch64: Set ext based onTCG_OPF_64BITT Alex Bennée
2014-03-05 17:17 ` [Qemu-devel] [PATCH 04/10] tcg-aarch64: Set ext basedonTCG_OPF_64BITTT Alex Bennée
2014-03-04 17:04 ` [Qemu-devel] [PATCH 05/10] tcg-aarch64: Don't handle mov/movi in tcg_out_op Richard Henderson
2014-03-05 12:11 ` [Qemu-devel] [PATCH 05/10] tcg-aarch64: Don't handle mov/movi intcg_out_opp Alex Bennée
2014-03-05 14:47 ` Richard Henderson
2014-03-04 17:04 ` [Qemu-devel] [PATCH 06/10] tcg-aarch64: Hoist common argument loads in tcg_out_op Richard Henderson
2014-03-05 12:14 ` Alex Bennée [this message]
2014-03-05 14:53 ` [Qemu-devel] [PATCH 06/10] tcg-aarch64: Hoist common argument loadsin tcg_out_opp Richard Henderson
2014-03-04 17:04 ` [Qemu-devel] [PATCH 07/10] tcg-aarch64: Remove the shift_imm parameter from tcg_out_cmp Richard Henderson
2014-03-05 12:15 ` [Qemu-devel] [PATCH 07/10] tcg-aarch64: Remove the shift_immparameter from tcg_out_cmpp Alex Bennée
2014-03-04 17:04 ` [Qemu-devel] [PATCH 08/10] tcg-aarch64: Use intptr_t apropriately Richard Henderson
2014-03-04 17:04 ` [Qemu-devel] [PATCH 09/10] tcg-aarch64: Simplify tcg_out_ldst_9 encoding Richard Henderson
2014-03-04 17:04 ` [Qemu-devel] [PATCH 10/10] tcg-aarch64: Remove nop from qemu_st slow path Richard Henderson
2014-03-06 10:06 ` [Qemu-devel] [PATCH 00/10] tcg/aarch64 cleanups Claudio Fontana
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87a9d4deqa.fsf@linaro.org \
--to=alex.bennee@linaro.org \
--cc=claudio.fontana@gmail.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).