* [Qemu-devel] [PATCH 1/4] target-sparc: Fix optimized %icc comparisons
2012-10-09 21:49 [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Richard Henderson
@ 2012-10-09 21:49 ` Richard Henderson
2012-10-09 21:49 ` [Qemu-devel] [PATCH 2/4] target-sparc: Optimize CC_OP_LOGIC conditions Richard Henderson
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-10-09 21:49 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
Signed-off-by: Richard Henderson <rth@twiddle.net>
Tested-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-sparc/translate.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 472eb51..71b9d65 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1120,6 +1120,7 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
cmp->c2 = tcg_temp_new();
tcg_gen_ext32s_tl(cmp->c1, cpu_cc_src);
tcg_gen_ext32s_tl(cmp->c2, cpu_cc_src2);
+ break;
}
#endif
cmp->g1 = cmp->g2 = true;
--
1.7.11.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 2/4] target-sparc: Optimize CC_OP_LOGIC conditions
2012-10-09 21:49 [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Richard Henderson
2012-10-09 21:49 ` [Qemu-devel] [PATCH 1/4] target-sparc: Fix optimized %icc comparisons Richard Henderson
@ 2012-10-09 21:49 ` Richard Henderson
2012-10-09 21:49 ` [Qemu-devel] [PATCH 3/4] target-sparc: Avoid unnecessary local temporaries Richard Henderson
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-10-09 21:49 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
target-sparc/translate.c | 58 +++++++++++++++++++++++++++++++++---------------
1 file changed, 40 insertions(+), 18 deletions(-)
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 71b9d65..4409f69 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1050,7 +1050,7 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
DisasContext *dc)
{
static int subcc_cond[16] = {
- -1, /* never */
+ TCG_COND_NEVER,
TCG_COND_EQ,
TCG_COND_LE,
TCG_COND_LT,
@@ -1058,7 +1058,7 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
TCG_COND_LTU,
-1, /* neg */
-1, /* overflow */
- -1, /* always */
+ TCG_COND_ALWAYS,
TCG_COND_NE,
TCG_COND_GT,
TCG_COND_GE,
@@ -1068,6 +1068,25 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
-1, /* no overflow */
};
+ static int logic_cond[16] = {
+ TCG_COND_NEVER,
+ TCG_COND_EQ, /* eq: Z */
+ TCG_COND_LE, /* le: Z | (N ^ V) -> Z | N */
+ TCG_COND_LT, /* lt: N ^ V -> N */
+ TCG_COND_EQ, /* leu: C | Z -> Z */
+ TCG_COND_NEVER, /* ltu: C -> 0 */
+ TCG_COND_LT, /* neg: N */
+ TCG_COND_NEVER, /* vs: V -> 0 */
+ TCG_COND_ALWAYS,
+ TCG_COND_NE, /* ne: !Z */
+ TCG_COND_GT, /* gt: !(Z | (N ^ V)) -> !(Z | N) */
+ TCG_COND_GE, /* ge: !(N ^ V) -> !N */
+ TCG_COND_NE, /* gtu: !(C | Z) -> !Z */
+ TCG_COND_ALWAYS, /* geu: !C -> 1 */
+ TCG_COND_GE, /* pos: !N */
+ TCG_COND_ALWAYS, /* vc: !V -> 1 */
+ };
+
TCGv_i32 r_src;
TCGv r_dst;
@@ -1082,28 +1101,31 @@ static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
#endif
switch (dc->cc_op) {
+ case CC_OP_LOGIC:
+ cmp->cond = logic_cond[cond];
+ do_compare_dst_0:
+ cmp->is_bool = false;
+ cmp->g2 = false;
+ cmp->c2 = tcg_const_tl(0);
+#ifdef TARGET_SPARC64
+ if (!xcc) {
+ cmp->g1 = false;
+ cmp->c1 = tcg_temp_new();
+ tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
+ break;
+ }
+#endif
+ cmp->g1 = true;
+ cmp->c1 = cpu_cc_dst;
+ break;
+
case CC_OP_SUB:
switch (cond) {
case 6: /* neg */
case 14: /* pos */
cmp->cond = (cond == 6 ? TCG_COND_LT : TCG_COND_GE);
- cmp->is_bool = false;
- cmp->g2 = false;
- cmp->c2 = tcg_const_tl(0);
-#ifdef TARGET_SPARC64
- if (!xcc) {
- cmp->g1 = false;
- cmp->c1 = tcg_temp_new();
- tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
- break;
- }
-#endif
- cmp->g1 = true;
- cmp->c1 = cpu_cc_dst;
- break;
+ goto do_compare_dst_0;
- case 0: /* never */
- case 8: /* always */
case 7: /* overflow */
case 15: /* !overflow */
goto do_dynamic;
--
1.7.11.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 3/4] target-sparc: Avoid unnecessary local temporaries
2012-10-09 21:49 [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Richard Henderson
2012-10-09 21:49 ` [Qemu-devel] [PATCH 1/4] target-sparc: Fix optimized %icc comparisons Richard Henderson
2012-10-09 21:49 ` [Qemu-devel] [PATCH 2/4] target-sparc: Optimize CC_OP_LOGIC conditions Richard Henderson
@ 2012-10-09 21:49 ` Richard Henderson
2012-10-09 21:50 ` [Qemu-devel] [PATCH 4/4] target-sparc: Don't compute full flags value so often Richard Henderson
2012-10-13 14:18 ` [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Blue Swirl
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-10-09 21:49 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
Now that save_state never ends a BB, we don't need to copy
values into local temps around it.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
target-sparc/translate.c | 30 +++++++++---------------------
1 file changed, 9 insertions(+), 21 deletions(-)
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 4409f69..65e6f23 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -143,7 +143,7 @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
if (src & 1) {
return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2]));
} else {
- TCGv_i32 ret = tcg_temp_local_new_i32();
+ TCGv_i32 ret = tcg_temp_new_i32();
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32);
@@ -3885,28 +3885,16 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
tcg_gen_mov_tl(cpu_tbr, cpu_tmp0);
break;
case 6: // pstate
- {
- TCGv r_tmp = tcg_temp_local_new();
-
- tcg_gen_mov_tl(r_tmp, cpu_tmp0);
- save_state(dc);
- gen_helper_wrpstate(cpu_env, r_tmp);
- tcg_temp_free(r_tmp);
- dc->npc = DYNAMIC_PC;
- }
+ save_state(dc);
+ gen_helper_wrpstate(cpu_env, cpu_tmp0);
+ dc->npc = DYNAMIC_PC;
break;
case 7: // tl
- {
- TCGv r_tmp = tcg_temp_local_new();
-
- tcg_gen_mov_tl(r_tmp, cpu_tmp0);
- save_state(dc);
- tcg_gen_trunc_tl_i32(cpu_tmp32, r_tmp);
- tcg_temp_free(r_tmp);
- tcg_gen_st_i32(cpu_tmp32, cpu_env,
- offsetof(CPUSPARCState, tl));
- dc->npc = DYNAMIC_PC;
- }
+ save_state(dc);
+ tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp32, cpu_env,
+ offsetof(CPUSPARCState, tl));
+ dc->npc = DYNAMIC_PC;
break;
case 8: // pil
gen_helper_wrpil(cpu_env, cpu_tmp0);
--
1.7.11.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Qemu-devel] [PATCH 4/4] target-sparc: Don't compute full flags value so often
2012-10-09 21:49 [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Richard Henderson
` (2 preceding siblings ...)
2012-10-09 21:49 ` [Qemu-devel] [PATCH 3/4] target-sparc: Avoid unnecessary local temporaries Richard Henderson
@ 2012-10-09 21:50 ` Richard Henderson
2012-10-13 14:18 ` [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Blue Swirl
4 siblings, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-10-09 21:50 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
Avoid speculatively computing flags before every potentially trapping
operation and instead do the flags computation when a trap actually
occurs. This gives approximately 30% speedup in emulation.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
linux-user/main.c | 5 +++++
target-sparc/int32_helper.c | 5 +++++
target-sparc/int64_helper.c | 5 +++++
target-sparc/translate.c | 30 +++++++++++-------------------
4 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/linux-user/main.c b/linux-user/main.c
index 9f3476b..f4bbe69 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1114,6 +1114,11 @@ void cpu_loop (CPUSPARCState *env)
while (1) {
trapnr = cpu_sparc_exec (env);
+ /* Compute PSR before exposing state. */
+ if (env->cc_op != CC_OP_FLAGS) {
+ cpu_get_psr(env);
+ }
+
switch (trapnr) {
#ifndef TARGET_SPARC64
case 0x88:
diff --git a/target-sparc/int32_helper.c b/target-sparc/int32_helper.c
index 9ac5aac..507c355 100644
--- a/target-sparc/int32_helper.c
+++ b/target-sparc/int32_helper.c
@@ -62,6 +62,11 @@ void do_interrupt(CPUSPARCState *env)
{
int cwp, intno = env->exception_index;
+ /* Compute PSR before exposing state. */
+ if (env->cc_op != CC_OP_FLAGS) {
+ cpu_get_psr(env);
+ }
+
#ifdef DEBUG_PCALL
if (qemu_loglevel_mask(CPU_LOG_INT)) {
static int count;
diff --git a/target-sparc/int64_helper.c b/target-sparc/int64_helper.c
index 5d0bc6c..df37aa1 100644
--- a/target-sparc/int64_helper.c
+++ b/target-sparc/int64_helper.c
@@ -64,6 +64,11 @@ void do_interrupt(CPUSPARCState *env)
int intno = env->exception_index;
trap_state *tsptr;
+ /* Compute PSR before exposing state. */
+ if (env->cc_op != CC_OP_FLAGS) {
+ cpu_get_psr(env);
+ }
+
#ifdef DEBUG_PCALL
if (qemu_loglevel_mask(CPU_LOG_INT)) {
static int count;
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 65e6f23..6cef96b 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1005,14 +1005,17 @@ static inline void save_npc(DisasContext *dc)
}
}
-static inline void save_state(DisasContext *dc)
+static inline void update_psr(DisasContext *dc)
{
- tcg_gen_movi_tl(cpu_pc, dc->pc);
- /* flush pending conditional evaluations before exposing cpu state */
if (dc->cc_op != CC_OP_FLAGS) {
dc->cc_op = CC_OP_FLAGS;
gen_helper_compute_psr(cpu_env);
}
+}
+
+static inline void save_state(DisasContext *dc)
+{
+ tcg_gen_movi_tl(cpu_pc, dc->pc);
save_npc(dc);
}
@@ -2704,7 +2707,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
break;
#ifdef TARGET_SPARC64
case 0x2: /* V9 rdccr */
- gen_helper_compute_psr(cpu_env);
+ update_psr(dc);
gen_helper_rdccr(cpu_dst, cpu_env);
gen_movl_TN_reg(rd, cpu_dst);
break;
@@ -2783,10 +2786,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
#if !defined(CONFIG_USER_ONLY)
} else if (xop == 0x29) { /* rdpsr / UA2005 rdhpr */
#ifndef TARGET_SPARC64
- if (!supervisor(dc))
+ if (!supervisor(dc)) {
goto priv_insn;
- gen_helper_compute_psr(cpu_env);
- dc->cc_op = CC_OP_FLAGS;
+ }
+ update_psr(dc);
gen_helper_rdpsr(cpu_dst, cpu_env);
#else
CHECK_IU_FEATURE(dc, HYPV);
@@ -3612,7 +3615,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
dc->cc_op = CC_OP_TSUBTV;
break;
case 0x24: /* mulscc */
- gen_helper_compute_psr(cpu_env);
+ update_psr(dc);
gen_op_mulscc(cpu_dst, cpu_src1, cpu_src2);
gen_movl_TN_reg(rd, cpu_dst);
tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
@@ -4651,12 +4654,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
{
unsigned int xop = GET_FIELD(insn, 7, 12);
- /* flush pending conditional evaluations before exposing
- cpu state */
- if (dc->cc_op != CC_OP_FLAGS) {
- dc->cc_op = CC_OP_FLAGS;
- gen_helper_compute_psr(cpu_env);
- }
cpu_src1 = get_src1(insn, cpu_src1);
if (xop == 0x3c || xop == 0x3e) { // V9 casa/casxa
rs2 = GET_FIELD(insn, 27, 31);
@@ -5507,9 +5504,4 @@ void restore_state_to_opc(CPUSPARCState *env, TranslationBlock *tb, int pc_pos)
} else {
env->npc = npc;
}
-
- /* flush pending conditional evaluations before exposing cpu state */
- if (CC_OP != CC_OP_FLAGS) {
- helper_compute_psr(env);
- }
}
--
1.7.11.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups
2012-10-09 21:49 [Qemu-devel] [PATCH 0/4] target-sparc: More CC cleanups Richard Henderson
` (3 preceding siblings ...)
2012-10-09 21:50 ` [Qemu-devel] [PATCH 4/4] target-sparc: Don't compute full flags value so often Richard Henderson
@ 2012-10-13 14:18 ` Blue Swirl
4 siblings, 0 replies; 6+ messages in thread
From: Blue Swirl @ 2012-10-13 14:18 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
Thanks, applied all.
On Tue, Oct 9, 2012 at 9:49 PM, Richard Henderson <rth@twiddle.net> wrote:
> The first patch is a bug fix for the previous series,
> already posted.
>
> The second patch improves the common cases of using
> andcc or orcc to test bits in a register. It also
> just so happens to show potential cleanup value in
> the ALWAYS and NEVER conditions. ;-)
>
> The third patch is one that I thought I'd included in
> the previous series, but which got lost during rebasing.
>
> The last patch is worth an incredible 30% speedup to
> generated code, as measured by sparc64 cc1plus run on
> a somewhat large input file.
>
>
> r~
>
>
> Richard Henderson (4):
> target-sparc: Fix optimized %icc comparisons
> target-sparc: Optimize CC_OP_LOGIC conditions
> target-sparc: Avoid unnecessary local temporaries
> target-sparc: Don't compute full flags value so often
>
> linux-user/main.c | 5 ++
> target-sparc/int32_helper.c | 5 ++
> target-sparc/int64_helper.c | 5 ++
> target-sparc/translate.c | 119 +++++++++++++++++++++++---------------------
> 4 files changed, 76 insertions(+), 58 deletions(-)
>
> --
> 1.7.11.4
>
^ permalink raw reply [flat|nested] 6+ messages in thread