From: Filip Navara <filip.navara@gmail.com>
To: Laurent Desnogues <laurent.desnogues@gmail.com>
Cc: Blue Swirl <blauwirbel@gmail.com>,
Anthony Liguori <aliguori@us.ibm.com>,
qemu-devel@nongnu.org, Avi Kivity <avi@redhat.com>
Subject: Re: OT: TCG SSA, speed, misc (was Re: [Qemu-devel] Re: [PATCH 08/11] QMP: Port balloon command)
Date: Mon, 29 Jun 2009 01:19:49 +0200 [thread overview]
Message-ID: <5b31733c0906281619k6a4bbf54s46de7d07b0395b2e@mail.gmail.com> (raw)
In-Reply-To: <761ea48b0906281424p5966022erbcb20143c06fd6b3@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1623 bytes --]
On Sun, Jun 28, 2009 at 11:24 PM, Laurent
Desnogues<laurent.desnogues@gmail.com> wrote:
> On Sun, Jun 28, 2009 at 8:19 PM, Filip Navara<filip.navara@gmail.com> wrote:
>> Doing a profiling run on several ARM demo programs showed that most of
>> the generated code was doing load/store operations to the machine
>> registers (in CPU_env). Sample run of FreeRTOS looked like this (OP
>> counts):
>>
>> movi_i32 1603
>> ld_i32 1305
>> st_i32 1174
>> add_i32 530
>> ...
>>
>> If there could be done something that would allow the guest registers
>> to be stored in host registers, even if for a temporary amount of time
>> it would certainly help the guests that I'm dealing with.
>
> TCG does a good job for register allocation.
>
> The problem you have here is that the ARM translator
> isn't using tcg_global_mem_new_i32 for ARM registers.
Interesting, thanks for the tip. I have been trying to achieve the
same effect using tcg_global_reg_new_i32, no wonder it felt so hard.
:)
> Here's an example of number of ops I see when using
> tcg_global_mem_new_i32:
>
> exit_tb 4991
> add_i32 7945
> st_i32 8257
> movi_i32 26812
> mov_i32 38369
>
> And with the trunk:
>
> exit_tb 4957
> add_i32 8165
> st_i32 20281
> ld_i32 21926
> movi_i32 25083
>
>
> Laurent
>
Attached is a proof-of-concept of ARM patch for using
tcg_global_mem_new_i32. I didn't have much time to test it yet, but on
synthetic benchmark it improved the performance by 13 DMIPS to the
total of 216 DMIPS, which equals to 6% improvement. On x86 host the
register allocation still looks very pathetic, I will post a follow-up
soon.
Best regards,
Filip Navara
[-- Attachment #2: 0001-First-try-at-using-tcg_global_mem_new_i32.patch.txt --]
[-- Type: text/plain, Size: 3869 bytes --]
From 4feddee0e7e02e1daab764dbbf9d694277b1e00a Mon Sep 17 00:00:00 2001
From: Filip Navara <filip.navara@gmail.com>
Date: Mon, 29 Jun 2009 01:13:42 +0200
Subject: [PATCH] First try at using tcg_global_mem_new_i32.
---
target-arm/translate.c | 40 +++++++++++++++++++++++-----------------
1 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 62c9eff..9a39536 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -77,6 +77,7 @@ typedef struct DisasContext {
static TCGv_ptr cpu_env;
/* We reuse the same 64-bit temporaries for efficiency. */
static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
+static TCGv_i32 cpu_R[16];
/* FIXME: These should be removed. */
static TCGv cpu_T[2];
@@ -86,14 +87,26 @@ static TCGv_i64 cpu_F0d, cpu_F1d;
#define ICOUNT_TEMP cpu_T[0]
#include "gen-icount.h"
+static const char *regnames[] =
+ { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
+
/* initialize TCG globals. */
void arm_translate_init(void)
{
+ int i;
+
cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0");
cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1");
+ for (i = 0; i < 16; i++) {
+ cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[i]),
+ regnames[i]);
+ }
+
#define GEN_HELPER 2
#include "helpers.h"
}
@@ -168,7 +181,7 @@ static void load_reg_var(DisasContext *s, TCGv var, int reg)
addr = (long)s->pc + 4;
tcg_gen_movi_i32(var, addr);
} else {
- tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_i32(var, cpu_R[reg]);
}
}
@@ -188,7 +201,7 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
tcg_gen_andi_i32(var, var, ~1);
s->is_jmp = DISAS_JUMP;
}
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_i32(cpu_R[reg], var);
dead_tmp(var);
}
@@ -790,27 +803,22 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr)
TCGv tmp;
s->is_jmp = DISAS_UPDATE;
- tmp = new_tmp();
if (s->thumb != (addr & 1)) {
+ tmp = new_tmp();
tcg_gen_movi_i32(tmp, addr & 1);
tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
+ dead_tmp(tmp);
}
- tcg_gen_movi_i32(tmp, addr & ~1);
- tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15]));
- dead_tmp(tmp);
+ tcg_gen_mov_i32(cpu_R[15], addr & ~1);
}
/* Set PC and Thumb state from var. var is marked as dead. */
static inline void gen_bx(DisasContext *s, TCGv var)
{
- TCGv tmp;
-
s->is_jmp = DISAS_UPDATE;
- tmp = new_tmp();
- tcg_gen_andi_i32(tmp, var, 1);
- store_cpu_field(tmp, thumb);
- tcg_gen_andi_i32(var, var, ~1);
- store_cpu_field(var, regs[15]);
+ tcg_gen_andi_i32(cpu_R[15], var, ~1);
+ tcg_gen_andi_i32(var, var, 1);
+ store_cpu_field(var, thumb);
}
/* Variant of store_reg which uses branch&exchange logic when storing
@@ -889,9 +897,7 @@ static inline void gen_movl_T2_reg(DisasContext *s, int reg)
static inline void gen_set_pc_im(uint32_t val)
{
- TCGv tmp = new_tmp();
- tcg_gen_movi_i32(tmp, val);
- store_cpu_field(tmp, regs[15]);
+ tcg_gen_movi_i32(cpu_R[15], val);
}
static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
@@ -903,7 +909,7 @@ static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
} else {
tmp = cpu_T[t];
}
- tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_i32(cpu_R[reg], tmp);
if (reg == 15) {
dead_tmp(tmp);
s->is_jmp = DISAS_JUMP;
--
1.6.3.msysgit.0
next prev parent reply other threads:[~2009-06-28 23:19 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-28 18:19 OT: TCG SSA, speed, misc (was Re: [Qemu-devel] Re: [PATCH 08/11] QMP: Port balloon command) Filip Navara
2009-06-28 21:24 ` Laurent Desnogues
2009-06-28 23:19 ` Filip Navara [this message]
2009-06-28 23:35 ` Filip Navara
2009-06-29 6:39 ` Laurent Desnogues
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5b31733c0906281619k6a4bbf54s46de7d07b0395b2e@mail.gmail.com \
--to=filip.navara@gmail.com \
--cc=aliguori@us.ibm.com \
--cc=avi@redhat.com \
--cc=blauwirbel@gmail.com \
--cc=laurent.desnogues@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).