* [PATCH] tcg/arm: Expand epilogue inline
@ 2019-10-15 1:29 Richard Henderson
2019-10-15 10:06 ` Philippe Mathieu-Daudé
0 siblings, 1 reply; 2+ messages in thread
From: Richard Henderson @ 2019-10-15 1:29 UTC (permalink / raw)
To: qemu-devel
It is, after all, just two instructions.
Profiling on a cortex-a15, using -d nochain to increase the number
of exit_tb that are executed, shows a minor improvement of 0.5%.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/arm/tcg-target.inc.c | 32 +++++++++++++-------------------
1 file changed, 13 insertions(+), 19 deletions(-)
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 94d80d79d1..2a9ebfe25a 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1745,24 +1745,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
#endif
}
-static tcg_insn_unit *tb_ret_addr;
+static void tcg_out_epilogue(TCGContext *s);
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
{
TCGArg a0, a1, a2, a3, a4, a5;
int c;
switch (opc) {
case INDEX_op_exit_tb:
- /* Reuse the zeroing that exists for goto_ptr. */
- a0 = args[0];
- if (a0 == 0) {
- tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
- } else {
- tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
- tcg_out_goto(s, COND_AL, tb_ret_addr);
- }
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+ tcg_out_epilogue(s);
break;
case INDEX_op_goto_tb:
{
@@ -2284,19 +2278,17 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+ TCG_TARGET_STACK_ALIGN - 1) \
& -TCG_TARGET_STACK_ALIGN)
+#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE)
+
static void tcg_target_qemu_prologue(TCGContext *s)
{
- int stack_addend;
-
/* Calling convention requires us to save r4-r11 and lr. */
/* stmdb sp!, { r4 - r11, lr } */
tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
/* Reserve callee argument and tcg temp space. */
- stack_addend = FRAME_SIZE - PUSH_SIZE;
-
tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
- TCG_REG_CALL_STACK, stack_addend, 1);
+ TCG_REG_CALL_STACK, STACK_ADDEND, 1);
tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
CPU_TEMP_BUF_NLONGS * sizeof(long));
@@ -2310,11 +2302,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
*/
s->code_gen_epilogue = s->code_ptr;
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
+ tcg_out_epilogue(s);
+}
- /* TB epilogue */
- tb_ret_addr = s->code_ptr;
+static void tcg_out_epilogue(TCGContext *s)
+{
tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
- TCG_REG_CALL_STACK, stack_addend, 1);
+ TCG_REG_CALL_STACK, STACK_ADDEND, 1);
/* ldmia sp!, { r4 - r11, pc } */
tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
--
2.17.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] tcg/arm: Expand epilogue inline
2019-10-15 1:29 [PATCH] tcg/arm: Expand epilogue inline Richard Henderson
@ 2019-10-15 10:06 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 2+ messages in thread
From: Philippe Mathieu-Daudé @ 2019-10-15 10:06 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
Hi Richard,
On 10/15/19 3:29 AM, Richard Henderson wrote:
> It is, after all, just two instructions.
>
> Profiling on a cortex-a15, using -d nochain to increase the number
> of exit_tb that are executed, shows a minor improvement of 0.5%.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/arm/tcg-target.inc.c | 32 +++++++++++++-------------------
> 1 file changed, 13 insertions(+), 19 deletions(-)
>
> diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
> index 94d80d79d1..2a9ebfe25a 100644
> --- a/tcg/arm/tcg-target.inc.c
> +++ b/tcg/arm/tcg-target.inc.c
> @@ -1745,24 +1745,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
> #endif
> }
>
> -static tcg_insn_unit *tb_ret_addr;
> +static void tcg_out_epilogue(TCGContext *s);
>
> -static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> - const TCGArg *args, const int *const_args)
> +static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> + const TCGArg *args, const int *const_args)
> {
> TCGArg a0, a1, a2, a3, a4, a5;
> int c;
>
> switch (opc) {
> case INDEX_op_exit_tb:
> - /* Reuse the zeroing that exists for goto_ptr. */
> - a0 = args[0];
> - if (a0 == 0) {
> - tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
> - } else {
> - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
> - tcg_out_goto(s, COND_AL, tb_ret_addr);
> - }
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
> + tcg_out_epilogue(s);
> break;
> case INDEX_op_goto_tb:
> {
> @@ -2284,19 +2278,17 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
> + TCG_TARGET_STACK_ALIGN - 1) \
> & -TCG_TARGET_STACK_ALIGN)
>
> +#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE)
> +
> static void tcg_target_qemu_prologue(TCGContext *s)
> {
> - int stack_addend;
> -
> /* Calling convention requires us to save r4-r11 and lr. */
> /* stmdb sp!, { r4 - r11, lr } */
> tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
>
> /* Reserve callee argument and tcg temp space. */
> - stack_addend = FRAME_SIZE - PUSH_SIZE;
> -
> tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
> - TCG_REG_CALL_STACK, stack_addend, 1);
> + TCG_REG_CALL_STACK, STACK_ADDEND, 1);
> tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
> CPU_TEMP_BUF_NLONGS * sizeof(long));
>
> @@ -2310,11 +2302,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
> */
> s->code_gen_epilogue = s->code_ptr;
> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
> + tcg_out_epilogue(s);
> +}
>
> - /* TB epilogue */
> - tb_ret_addr = s->code_ptr;
> +static void tcg_out_epilogue(TCGContext *s)
Do you mind splitting this patch in 2?
First use tcg_out_epilogue(), then optimize tcg_out_op().
> +{
> tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
> - TCG_REG_CALL_STACK, stack_addend, 1);
> + TCG_REG_CALL_STACK, STACK_ADDEND, 1);
>
> /* ldmia sp!, { r4 - r11, pc } */
> tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-10-15 10:07 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-10-15 1:29 [PATCH] tcg/arm: Expand epilogue inline Richard Henderson
2019-10-15 10:06 ` Philippe Mathieu-Daudé
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).