qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations
@ 2012-09-21 19:33 Aurelien Jarno
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags Aurelien Jarno
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-21 19:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Aurelien Jarno

This patch series optimizes the ARM target by:
 - using globals instead of ld/st function
 - using TCG code instead of helpers
 - marking some helpers const and pure

--
Changes v1 -> v2
 - updated patch 3 to use movcond instead of setcond. Also converted sar.
 - updated patch 4 following changes in patch 3
 - added patch 5


Aurelien Jarno (5):
  target-arm: use globals for CC flags
  target-arm: convert add_cc and sub_cc helpers to TCG
  target-arm: convert sar, shl and shr helpers to TCG
  target-arm: mark a few integer helpers const and pure
  target-arm: use deposit instead of hardcoded version

 target-arm/helper.h    |   24 ++---
 target-arm/op_helper.c |   44 --------
 target-arm/translate.c |  263 ++++++++++++++++++++++++++----------------------
 3 files changed, 155 insertions(+), 176 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags
  2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
@ 2012-09-21 19:33 ` Aurelien Jarno
  2012-09-25 16:40   ` Peter Maydell
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 2/5] target-arm: convert add_cc and sub_cc helpers to TCG Aurelien Jarno
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-21 19:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Aurelien Jarno

Use globals for CC flags instead of loading/storing them each they are
accessed. This allows some optimizations to be performed by the TCG
optimization passes.

Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-arm/translate.c |  127 ++++++++++++++++++------------------------------
 1 file changed, 46 insertions(+), 81 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index f4b447a..3deb24d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -85,6 +85,7 @@ static TCGv_ptr cpu_env;
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
+static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 static TCGv_i32 cpu_exclusive_addr;
 static TCGv_i32 cpu_exclusive_val;
 static TCGv_i32 cpu_exclusive_high;
@@ -115,6 +116,11 @@ void arm_translate_init(void)
                                           offsetof(CPUARMState, regs[i]),
                                           regnames[i]);
     }
+    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
+    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
+    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
+    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
+
     cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
@@ -369,53 +375,39 @@ static void gen_add16(TCGv t0, TCGv t1)
     tcg_temp_free_i32(t1);
 }
 
-#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, CF))
-
 /* Set CF to the top bit of var.  */
 static void gen_set_CF_bit31(TCGv var)
 {
-    TCGv tmp = tcg_temp_new_i32();
-    tcg_gen_shri_i32(tmp, var, 31);
-    gen_set_CF(tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_shri_i32(cpu_CF, var, 31);
 }
 
 /* Set N and Z flags from var.  */
 static inline void gen_logic_CC(TCGv var)
 {
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, NF));
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, ZF));
+    tcg_gen_mov_i32(cpu_NF, var);
+    tcg_gen_mov_i32(cpu_ZF, var);
 }
 
 /* T0 += T1 + CF.  */
 static void gen_adc(TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_add_i32(t0, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(t0, t0, tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_add_i32(t0, t0, cpu_CF);
 }
 
 /* dest = T0 + T1 + CF. */
 static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_add_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_add_i32(dest, dest, cpu_CF);
 }
 
 /* dest = T0 - T1 + CF - 1.  */
 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_sub_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
+    tcg_gen_add_i32(dest, dest, cpu_CF);
     tcg_gen_subi_i32(dest, dest, 1);
-    tcg_temp_free_i32(tmp);
 }
 
 /* FIXME:  Implement this natively.  */
@@ -423,16 +415,14 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 
 static void shifter_out_im(TCGv var, int shift)
 {
-    TCGv tmp = tcg_temp_new_i32();
     if (shift == 0) {
-        tcg_gen_andi_i32(tmp, var, 1);
+        tcg_gen_andi_i32(cpu_CF, var, 1);
     } else {
-        tcg_gen_shri_i32(tmp, var, shift);
-        if (shift != 31)
-            tcg_gen_andi_i32(tmp, tmp, 1);
+        tcg_gen_shri_i32(cpu_CF, var, shift);
+        if (shift != 31) {
+            tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
+        }
     }
-    gen_set_CF(tmp);
-    tcg_temp_free_i32(tmp);
 }
 
 /* Shift by immediate.  Includes special handling for shift == 0.  */
@@ -449,8 +439,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
     case 1: /* LSR */
         if (shift == 0) {
             if (flags) {
-                tcg_gen_shri_i32(var, var, 31);
-                gen_set_CF(var);
+                tcg_gen_shri_i32(cpu_CF, var, 31);
             }
             tcg_gen_movi_i32(var, 0);
         } else {
@@ -474,11 +463,11 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
                 shifter_out_im(var, shift - 1);
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
-            TCGv tmp = load_cpu_field(CF);
+            TCGv tmp = tcg_temp_new_i32();
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, tmp, 31);
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
@@ -603,99 +592,75 @@ static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 static void gen_test_cc(int cc, int label)
 {
     TCGv tmp;
-    TCGv tmp2;
     int inv;
 
     switch (cc) {
     case 0: /* eq: Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
         break;
     case 1: /* ne: !Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
         break;
     case 2: /* cs: C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
         break;
     case 3: /* cc: !C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
         break;
     case 4: /* mi: N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
         break;
     case 5: /* pl: !N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
         break;
     case 6: /* vs: V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
         break;
     case 7: /* vc: !V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
         break;
     case 8: /* hi: C && !Z */
         inv = gen_new_label();
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
         gen_set_label(inv);
         break;
     case 9: /* ls: !C || Z */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
         break;
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     case 12: /* gt: !Z && N == V */
         inv = gen_new_label();
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         gen_set_label(inv);
         break;
     case 13: /* le: Z || N != V */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
-    tcg_temp_free_i32(tmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 2/5] target-arm: convert add_cc and sub_cc helpers to TCG
  2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags Aurelien Jarno
@ 2012-09-21 19:33 ` Aurelien Jarno
  2012-09-25 16:40   ` Peter Maydell
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr " Aurelien Jarno
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-21 19:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Aurelien Jarno

Now that the setcond TCG op is available, it's possible to replace
add_cc and sub_cc helpers by TCG code. The code generated by TCG is
actually very close to the one generated by GCC for the helper, and
this avoid all the consequences of using an helper: globals saved back
to memory, no possible optimization, call overhead, etc.

Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-arm/helper.h    |    2 --
 target-arm/op_helper.c |   20 ---------------
 target-arm/translate.c |   66 +++++++++++++++++++++++++++++++++++-------------
 3 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/target-arm/helper.h b/target-arm/helper.h
index afdb2b5..7151e28 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -142,9 +142,7 @@ DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
 DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_3(add_cc, i32, env, i32, i32)
 DEF_HELPER_3(adc_cc, i32, env, i32, i32)
-DEF_HELPER_3(sub_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
 DEF_HELPER_3(shl, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index f13fc3a..6095f24 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -323,16 +323,6 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER (add_cc)(CPUARMState *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a + b;
-    env->NF = env->ZF = result;
-    env->CF = result < a;
-    env->VF = (a ^ b ^ -1) & (a ^ result);
-    return result;
-}
-
 uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
@@ -348,16 +338,6 @@ uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
     return result;
 }
 
-uint32_t HELPER(sub_cc)(CPUARMState *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a - b;
-    env->NF = env->ZF = result;
-    env->CF = a >= b;
-    env->VF = (a ^ b) & (a ^ result);
-    return result;
-}
-
 uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 3deb24d..19bb1e8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -410,6 +410,36 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_subi_i32(dest, dest, 1);
 }
 
+/* dest = T0 + T1. Compute C, N, V and Z flags */
+static void gen_add_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_add_i32(cpu_NF, t0, t1);
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_setcond_i32(TCG_COND_LTU, cpu_CF, cpu_NF, t0);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tmp = tcg_temp_new_i32();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+/* dest = T0 - T1. Compute C, N, V and Z flags */
+static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_sub_i32(cpu_NF, t0, t1);
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tmp = tcg_temp_new_i32();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
 /* FIXME:  Implement this natively.  */
 #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
 
@@ -6970,11 +7000,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 if (IS_USER(s)) {
                     goto illegal_op;
                 }
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 gen_exception_return(s, tmp);
             } else {
                 if (set_cc) {
-                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
                 } else {
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 }
@@ -6983,7 +7013,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x03:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
+                gen_sub_CC(tmp, tmp2, tmp);
             } else {
                 tcg_gen_sub_i32(tmp, tmp2, tmp);
             }
@@ -6991,7 +7021,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x04:
             if (set_cc) {
-                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                gen_add_CC(tmp, tmp, tmp2);
             } else {
                 tcg_gen_add_i32(tmp, tmp, tmp2);
             }
@@ -7037,13 +7067,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x0a:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                gen_add_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
@@ -7830,7 +7860,7 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG
         break;
     case 8: /* add */
         if (conds)
-            gen_helper_add_cc(t0, cpu_env, t0, t1);
+            gen_add_CC(t0, t0, t1);
         else
             tcg_gen_add_i32(t0, t0, t1);
         break;
@@ -7848,13 +7878,13 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG
         break;
     case 13: /* sub */
         if (conds)
-            gen_helper_sub_cc(t0, cpu_env, t0, t1);
+            gen_sub_CC(t0, t0, t1);
         else
             tcg_gen_sub_i32(t0, t0, t1);
         break;
     case 14: /* rsb */
         if (conds)
-            gen_helper_sub_cc(t0, cpu_env, t1, t0);
+            gen_sub_CC(t0, t1, t0);
         else
             tcg_gen_sub_i32(t0, t1, t0);
         break;
@@ -8982,12 +9012,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
             } else {
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_add_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
@@ -9018,7 +9048,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             tcg_gen_movi_i32(tmp2, insn & 0xff);
             switch (op) {
             case 1: /* cmp */
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp);
                 tcg_temp_free_i32(tmp2);
                 break;
@@ -9026,7 +9056,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_add_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9034,7 +9064,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9070,7 +9100,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             case 1: /* cmp */
                 tmp = load_reg(s, rd);
                 tmp2 = load_reg(s, rm);
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 tcg_temp_free_i32(tmp);
                 break;
@@ -9183,14 +9213,14 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             if (s->condexec_mask)
                 tcg_gen_neg_i32(tmp, tmp2);
             else
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
             break;
         case 0xa: /* cmp */
-            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+            gen_sub_CC(tmp, tmp, tmp2);
             rd = 16;
             break;
         case 0xb: /* cmn */
-            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+            gen_add_CC(tmp, tmp, tmp2);
             rd = 16;
             break;
         case 0xc: /* orr */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr helpers to TCG
  2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags Aurelien Jarno
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 2/5] target-arm: convert add_cc and sub_cc helpers to TCG Aurelien Jarno
@ 2012-09-21 19:33 ` Aurelien Jarno
  2012-09-21 23:14   ` Richard Henderson
  2012-09-25 13:57   ` Peter Maydell
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 4/5] target-arm: mark a few integer helpers const and pure Aurelien Jarno
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-21 19:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Aurelien Jarno

Now that the movcond TCG op is available, it's possible to replace
shl and shr helpers by TCG code. The code generated by TCG is slightly
longer than the code generated by GCC for the helper but is still worth
it as this avoid all the consequences of using an helper: globals saved
back to memory, no possible optimization, call overhead, etc.

Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-arm/helper.h    |    3 ---
 target-arm/op_helper.c |   24 ----------------------
 target-arm/translate.c |   52 ++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 7151e28..794e2b1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -145,9 +145,6 @@ DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 DEF_HELPER_3(adc_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
-DEF_HELPER_3(shl, i32, env, i32, i32)
-DEF_HELPER_3(shr, i32, env, i32, i32)
-DEF_HELPER_3(sar, i32, env, i32, i32)
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
 DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6095f24..aef592a 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -355,30 +355,6 @@ uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 
 /* Similarly for variable shift instructions.  */
 
-uint32_t HELPER(shl)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return x << shift;
-}
-
-uint32_t HELPER(shr)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return (uint32_t)x >> shift;
-}
-
-uint32_t HELPER(sar)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        shift = 31;
-    return (int32_t)x >> shift;
-}
-
 uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 19bb1e8..9abc115 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -440,6 +440,40 @@ static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_mov_i32(dest, cpu_NF);
 }
 
+#define GEN_SHIFT(name)                                               \
+static void gen_##name(TCGv dest, TCGv t0, TCGv t1)                   \
+{                                                                     \
+    TCGv tmp1, tmp2, tmp3;                                            \
+    tmp1 = tcg_temp_new_i32();                                        \
+    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
+    tmp2 = tcg_const_i32(0);                                          \
+    tmp3 = tcg_const_i32(0x1f);                                       \
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
+    tcg_temp_free_i32(tmp3);                                          \
+    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
+    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
+    tcg_temp_free_i32(tmp2);                                          \
+    tcg_temp_free_i32(tmp1);                                          \
+}
+GEN_SHIFT(shl)
+GEN_SHIFT(shr)
+#undef GEN_SHIFT
+
+static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp1, tmp2, tmp3;
+    tmp1 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(tmp1, t1, 0xff);
+    tmp2 = tcg_const_i32(0x1f);
+    tmp3 = tcg_const_i32(0);
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
+    tcg_temp_free_i32(tmp3);
+    tcg_temp_free_i32(tmp2);
+    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
+    tcg_gen_sar_i32(dest, t0, tmp1);
+    tcg_temp_free_i32(tmp1);
+}
+
 /* FIXME:  Implement this natively.  */
 #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
 
@@ -516,9 +550,15 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop,
         }
     } else {
         switch (shiftop) {
-        case 0: gen_helper_shl(var, cpu_env, var, shift); break;
-        case 1: gen_helper_shr(var, cpu_env, var, shift); break;
-        case 2: gen_helper_sar(var, cpu_env, var, shift); break;
+        case 0:
+            gen_shl(var, var, shift);
+            break;
+        case 1:
+            gen_shr(var, var, shift);
+            break;
+        case 2:
+            gen_sar(var, var, shift);
+            break;
         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
                 tcg_gen_rotr_i32(var, var, shift); break;
         }
@@ -9161,7 +9201,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x2: /* lsl */
             if (s->condexec_mask) {
-                gen_helper_shl(tmp2, cpu_env, tmp2, tmp);
+                gen_shl(tmp2, tmp2, tmp);
             } else {
                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
@@ -9169,7 +9209,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x3: /* lsr */
             if (s->condexec_mask) {
-                gen_helper_shr(tmp2, cpu_env, tmp2, tmp);
+                gen_shr(tmp2, tmp2, tmp);
             } else {
                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
@@ -9177,7 +9217,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x4: /* asr */
             if (s->condexec_mask) {
-                gen_helper_sar(tmp2, cpu_env, tmp2, tmp);
+                gen_sar(tmp2, tmp2, tmp);
             } else {
                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 4/5] target-arm: mark a few integer helpers const and pure
  2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
                   ` (2 preceding siblings ...)
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr " Aurelien Jarno
@ 2012-09-21 19:33 ` Aurelien Jarno
  2012-09-25 16:40   ` Peter Maydell
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 5/5] target-arm: use deposit instead of hardcoded version Aurelien Jarno
  2012-09-25 16:39 ` [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Peter Maydell
  5 siblings, 1 reply; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-21 19:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Aurelien Jarno

Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-arm/helper.h |   19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 794e2b1..8b9adf1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -1,8 +1,8 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(clz, i32, i32)
-DEF_HELPER_1(sxtb16, i32, i32)
-DEF_HELPER_1(uxtb16, i32, i32)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
 DEF_HELPER_3(add_setq, i32, env, i32, i32)
 DEF_HELPER_3(add_saturate, i32, env, i32, i32)
@@ -10,10 +10,10 @@ DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
 DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
 DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
 DEF_HELPER_2(double_saturate, i32, env, s32)
-DEF_HELPER_2(sdiv, s32, s32, s32)
-DEF_HELPER_2(udiv, i32, i32, i32)
-DEF_HELPER_1(rbit, i32, i32)
-DEF_HELPER_1(abs, i32, i32)
+DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_CONST | TCG_CALL_PURE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(udiv, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
+DEF_HELPER_FLAGS_1(rbit, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(abs, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
 #define PAS_OP(pfx)  \
     DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
@@ -45,11 +45,12 @@ DEF_HELPER_3(usat, i32, env, i32, i32)
 DEF_HELPER_3(ssat16, i32, env, i32, i32)
 DEF_HELPER_3(usat16, i32, env, i32, i32)
 
-DEF_HELPER_2(usad8, i32, i32, i32)
+DEF_HELPER_FLAGS_2(usad8, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
 
 DEF_HELPER_1(logicq_cc, i32, i64)
 
-DEF_HELPER_3(sel_flags, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_CONST | TCG_CALL_PURE,
+                   i32, i32, i32, i32)
 DEF_HELPER_2(exception, void, env, i32)
 DEF_HELPER_1(wfi, void, env)
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 5/5] target-arm: use deposit instead of hardcoded version
  2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
                   ` (3 preceding siblings ...)
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 4/5] target-arm: mark a few integer helpers const and pure Aurelien Jarno
@ 2012-09-21 19:33 ` Aurelien Jarno
  2012-09-25 16:41   ` Peter Maydell
  2012-09-25 16:39 ` [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Peter Maydell
  5 siblings, 1 reply; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-21 19:33 UTC (permalink / raw)
  To: qemu-devel; +Cc: Peter Maydell, Aurelien Jarno

Use the deposit op instead of and hardcoded bit field insertion. It
allows the host to emit the corresponding instruction if available.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-arm/translate.c |   20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9abc115..762d8ee 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -277,15 +277,6 @@ static void gen_sbfx(TCGv var, int shift, int width)
     }
 }
 
-/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
-static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
-{
-    tcg_gen_andi_i32(val, val, mask);
-    tcg_gen_shli_i32(val, val, shift);
-    tcg_gen_andi_i32(base, base, ~(mask << shift));
-    tcg_gen_or_i32(dest, base, val);
-}
-
 /* Return (b << 32) + a. Mark inputs as dead */
 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
 {
@@ -2663,12 +2654,12 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                         switch (size) {
                         case 0:
                             tmp2 = neon_load_reg(rn, pass);
-                            gen_bfi(tmp, tmp2, tmp, offset, 0xff);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
                             tcg_temp_free_i32(tmp2);
                             break;
                         case 1:
                             tmp2 = neon_load_reg(rn, pass);
-                            gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
                             tcg_temp_free_i32(tmp2);
                             break;
                         case 2:
@@ -4024,7 +4015,8 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     }
                     if (size != 2) {
                         tmp2 = neon_load_reg(rd, pass);
-                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
+                        tcg_gen_deposit_i32(tmp, tmp2, tmp,
+                                            shift, size ? 16 : 8);
                         tcg_temp_free_i32(tmp2);
                     }
                     neon_store_reg(rd, pass, tmp);
@@ -7628,7 +7620,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                         }
                         if (i != 32) {
                             tmp2 = load_reg(s, rd);
-                            gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
                             tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
@@ -8739,7 +8731,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         imm = imm + 1 - shift;
                         if (imm != 32) {
                             tmp2 = load_reg(s, rd);
-                            gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
                             tcg_temp_free_i32(tmp2);
                         }
                         break;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr helpers to TCG
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr " Aurelien Jarno
@ 2012-09-21 23:14   ` Richard Henderson
  2012-09-22  9:33     ` Aurelien Jarno
  2012-09-25 13:57   ` Peter Maydell
  1 sibling, 1 reply; 15+ messages in thread
From: Richard Henderson @ 2012-09-21 23:14 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: Peter Maydell, qemu-devel

On 09/21/2012 12:33 PM, Aurelien Jarno wrote:
> +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> +{
> +    TCGv tmp1, tmp2, tmp3;
> +    tmp1 = tcg_temp_new_i32();
> +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> +    tmp2 = tcg_const_i32(0x1f);
> +    tmp3 = tcg_const_i32(0);
> +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> +    tcg_temp_free_i32(tmp3);
> +    tcg_temp_free_i32(tmp2);
> +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> +    tcg_gen_sar_i32(dest, t0, tmp1);
> +    tcg_temp_free_i32(tmp1);
> +}

tmp3 is unused in this version.


r~

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr helpers to TCG
  2012-09-21 23:14   ` Richard Henderson
@ 2012-09-22  9:33     ` Aurelien Jarno
  0 siblings, 0 replies; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-22  9:33 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Peter Maydell, qemu-devel

On Fri, Sep 21, 2012 at 04:14:29PM -0700, Richard Henderson wrote:
> On 09/21/2012 12:33 PM, Aurelien Jarno wrote:
> > +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> > +{
> > +    TCGv tmp1, tmp2, tmp3;
> > +    tmp1 = tcg_temp_new_i32();
> > +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> > +    tmp2 = tcg_const_i32(0x1f);
> > +    tmp3 = tcg_const_i32(0);
> > +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> > +    tcg_temp_free_i32(tmp3);
> > +    tcg_temp_free_i32(tmp2);
> > +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> > +    tcg_gen_sar_i32(dest, t0, tmp1);
> > +    tcg_temp_free_i32(tmp1);
> > +}
> 
> tmp3 is unused in this version.
> 

Good catch, fix locally.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr helpers to TCG
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr " Aurelien Jarno
  2012-09-21 23:14   ` Richard Henderson
@ 2012-09-25 13:57   ` Peter Maydell
  2012-09-25 22:41     ` Aurelien Jarno
  1 sibling, 1 reply; 15+ messages in thread
From: Peter Maydell @ 2012-09-25 13:57 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> +{
> +    TCGv tmp1, tmp2, tmp3;
> +    tmp1 = tcg_temp_new_i32();
> +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> +    tmp2 = tcg_const_i32(0x1f);
> +    tmp3 = tcg_const_i32(0);
> +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> +    tcg_temp_free_i32(tmp3);
> +    tcg_temp_free_i32(tmp2);
> +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> +    tcg_gen_sar_i32(dest, t0, tmp1);
> +    tcg_temp_free_i32(tmp1);
> +}

I don't think you need the "tcg_gen_andi_i32(tmp1, tmp1, 0x1f);"
for sar, do you? The movcond is doing "shift = (shift > 31) ? 31 : shift"
so we know it's going to be in [0..31] and the and will do nothing,
right?

thanks
-- PMM

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations
  2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
                   ` (4 preceding siblings ...)
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 5/5] target-arm: use deposit instead of hardcoded version Aurelien Jarno
@ 2012-09-25 16:39 ` Peter Maydell
  5 siblings, 0 replies; 15+ messages in thread
From: Peter Maydell @ 2012-09-25 16:39 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> This patch series optimizes the ARM target by:
>  - using globals instead of ld/st function
>  - using TCG code instead of helpers
>  - marking some helpers const and pure

My slightly dubious benchmark script seems to show improvements
of between 5 and 20% depending on the sub-benchmark, which is
pretty good.

-- PMM

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 2/5] target-arm: convert add_cc and sub_cc helpers to TCG
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 2/5] target-arm: convert add_cc and sub_cc helpers to TCG Aurelien Jarno
@ 2012-09-25 16:40   ` Peter Maydell
  0 siblings, 0 replies; 15+ messages in thread
From: Peter Maydell @ 2012-09-25 16:40 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> Now that the setcond TCG op is available, it's possible to replace
> add_cc and sub_cc helpers by TCG code. The code generated by TCG is
> actually very close to the one generated by GCC for the helper, and
> this avoid all the consequences of using an helper: globals saved back
> to memory, no possible optimization, call overhead, etc.
>
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

-- PMM

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 4/5] target-arm: mark a few integer helpers const and pure
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 4/5] target-arm: mark a few integer helpers const and pure Aurelien Jarno
@ 2012-09-25 16:40   ` Peter Maydell
  0 siblings, 0 replies; 15+ messages in thread
From: Peter Maydell @ 2012-09-25 16:40 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

-- PMM

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags Aurelien Jarno
@ 2012-09-25 16:40   ` Peter Maydell
  0 siblings, 0 replies; 15+ messages in thread
From: Peter Maydell @ 2012-09-25 16:40 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> Use globals for CC flags instead of loading/storing them each they are
> accessed. This allows some optimizations to be performed by the TCG
> optimization passes.
>
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

-- PMM

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/5] target-arm: use deposit instead of hardcoded version
  2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 5/5] target-arm: use deposit instead of hardcoded version Aurelien Jarno
@ 2012-09-25 16:41   ` Peter Maydell
  0 siblings, 0 replies; 15+ messages in thread
From: Peter Maydell @ 2012-09-25 16:41 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> Use the deposit op instead of and hardcoded bit field insertion. It
> allows the host to emit the corresponding instruction if available.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

-- PMM

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr helpers to TCG
  2012-09-25 13:57   ` Peter Maydell
@ 2012-09-25 22:41     ` Aurelien Jarno
  0 siblings, 0 replies; 15+ messages in thread
From: Aurelien Jarno @ 2012-09-25 22:41 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel

On Tue, Sep 25, 2012 at 02:57:20PM +0100, Peter Maydell wrote:
> On 21 September 2012 20:33, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > +static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
> > +{
> > +    TCGv tmp1, tmp2, tmp3;
> > +    tmp1 = tcg_temp_new_i32();
> > +    tcg_gen_andi_i32(tmp1, t1, 0xff);
> > +    tmp2 = tcg_const_i32(0x1f);
> > +    tmp3 = tcg_const_i32(0);
> > +    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
> > +    tcg_temp_free_i32(tmp3);
> > +    tcg_temp_free_i32(tmp2);
> > +    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);
> > +    tcg_gen_sar_i32(dest, t0, tmp1);
> > +    tcg_temp_free_i32(tmp1);
> > +}
> 
> I don't think you need the "tcg_gen_andi_i32(tmp1, tmp1, 0x1f);"
> for sar, do you? The movcond is doing "shift = (shift > 31) ? 31 : shift"
> so we know it's going to be in [0..31] and the and will do nothing,
> right?
> 

Indeed, that's a leftover from the previous version based on setcond. 
It should be removed.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2012-09-25 22:41 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-21 19:33 [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Aurelien Jarno
2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 1/5] target-arm: use globals for CC flags Aurelien Jarno
2012-09-25 16:40   ` Peter Maydell
2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 2/5] target-arm: convert add_cc and sub_cc helpers to TCG Aurelien Jarno
2012-09-25 16:40   ` Peter Maydell
2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 3/5] target-arm: convert sar, shl and shr " Aurelien Jarno
2012-09-21 23:14   ` Richard Henderson
2012-09-22  9:33     ` Aurelien Jarno
2012-09-25 13:57   ` Peter Maydell
2012-09-25 22:41     ` Aurelien Jarno
2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 4/5] target-arm: mark a few integer helpers const and pure Aurelien Jarno
2012-09-25 16:40   ` Peter Maydell
2012-09-21 19:33 ` [Qemu-devel] [PATCH v2 5/5] target-arm: use deposit instead of hardcoded version Aurelien Jarno
2012-09-25 16:41   ` Peter Maydell
2012-09-25 16:39 ` [Qemu-devel] [PATCH v2 0/5] target-arm: misc optimizations Peter Maydell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).