qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements
@ 2013-03-12  6:43 Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 1/9] tcg-arm: Use bic to implement and with constant Richard Henderson
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

Changes v1-v2:
  * Use more helper functions to handle K and N constraints.
  * Improve add2/sub2.  
  * Improve epilogues, as suggested in the previous thread.
  * Fix a typo in the name of the deposit helper.
  * Implement division for cortex-a15.


r~


Richard Henderson (9):
  tcg-arm: Use bic to implement and with constant
  tcg-arm: Handle negated constant arguments to and/sub
  tcg-arm: Allow constant first argument to sub
  tcg-arm: Use tcg_out_dat_rIN for compares
  tcg-arm: Handle constant arguments to add2/sub2
  tcg-arm: Improve constant generation
  tcg-arm: Fold epilogue into INDEX_op_exit_tb
  tcg-arm: Implement deposit for armv7
  tcg-arm: Implement division instructions

 disas/arm.c          |   4 +
 tcg/arm/tcg-target.c | 410 ++++++++++++++++++++++++++++++++++++---------------
 tcg/arm/tcg-target.h |  14 +-
 3 files changed, 304 insertions(+), 124 deletions(-)

-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 1/9] tcg-arm: Use bic to implement and with constant
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 2/9] tcg-arm: Handle negated constant arguments to and/sub Richard Henderson
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

This greatly improves the code we can produce for deposit
without armv7 support.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 52 ++++++++++++++++++++++++++++++++++++++++++----------
 tcg/arm/tcg-target.h |  2 --
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 94c6ca4..9972792 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -145,6 +145,9 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
+#define TCG_CT_CONST_ARM 0x100
+#define TCG_CT_CONST_INV 0x200
+
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -155,6 +158,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     case 'I':
          ct->ct |= TCG_CT_CONST_ARM;
          break;
+    case 'K':
+         ct->ct |= TCG_CT_CONST_INV;
+         break;
 
     case 'r':
         ct->ct |= TCG_CT_REG;
@@ -275,16 +281,19 @@ static inline int check_fit_imm(uint32_t imm)
  * add, sub, eor...: ditto
  */
 static inline int tcg_target_const_match(tcg_target_long val,
-                const TCGArgConstraint *arg_ct)
+                                         const TCGArgConstraint *arg_ct)
 {
     int ct;
     ct = arg_ct->ct;
-    if (ct & TCG_CT_CONST)
+    if (ct & TCG_CT_CONST) {
         return 1;
-    else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val))
+    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
         return 1;
-    else
+    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
+        return 1;
+    } else {
         return 0;
+    }
 }
 
 enum arm_data_opc_e {
@@ -482,6 +491,27 @@ static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
     }
 }
 
+static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
+                            TCGReg dst, TCGReg lhs, TCGArg rhs,
+                            bool rhs_is_const)
+{
+    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+     * rhs must satisfy the "rIK" constraint.
+     */
+    if (rhs_is_const) {
+        int rot = encode_imm(rhs);
+        if (rot < 0) {
+            rhs = ~rhs;
+            rot = encode_imm(rhs);
+            assert(rot >= 0);
+            opc = opinv;
+        }
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+    } else {
+        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+    }
+}
+
 static inline void tcg_out_mul32(TCGContext *s,
                 int cond, int rd, int rs, int rm)
 {
@@ -1610,11 +1640,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         c = ARITH_SUB;
         goto gen_arith;
     case INDEX_op_and_i32:
-        c = ARITH_AND;
-        goto gen_arith;
+        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
+                        args[0], args[1], args[2], const_args[2]);
+        break;
     case INDEX_op_andc_i32:
-        c = ARITH_BIC;
-        goto gen_arith;
+        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
+                        args[0], args[1], args[2], const_args[2]);
+        break;
     case INDEX_op_or_i32:
         c = ARITH_ORR;
         goto gen_arith;
@@ -1802,8 +1834,8 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_mul_i32, { "r", "r", "r" } },
     { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
     { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
-    { INDEX_op_and_i32, { "r", "r", "rI" } },
-    { INDEX_op_andc_i32, { "r", "r", "rI" } },
+    { INDEX_op_and_i32, { "r", "r", "rIK" } },
+    { INDEX_op_andc_i32, { "r", "r", "rIK" } },
     { INDEX_op_or_i32, { "r", "r", "rI" } },
     { INDEX_op_xor_i32, { "r", "r", "rI" } },
     { INDEX_op_neg_i32, { "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index b6eed1f..354dd8a 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -49,8 +49,6 @@ typedef enum {
 
 #define TCG_TARGET_NB_REGS 16
 
-#define TCG_CT_CONST_ARM 0x100
-
 /* used for function call generation */
 #define TCG_REG_CALL_STACK		TCG_REG_R13
 #define TCG_TARGET_STACK_ALIGN		8
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 2/9] tcg-arm: Handle negated constant arguments to and/sub
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 1/9] tcg-arm: Use bic to implement and with constant Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 3/9] tcg-arm: Allow constant first argument to sub Richard Henderson
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

This greatly improves code generation for addition of small
negative constants.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 9972792..f470caa 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -147,6 +147,7 @@ static void patch_reloc(uint8_t *code_ptr, int type,
 
 #define TCG_CT_CONST_ARM 0x100
 #define TCG_CT_CONST_INV 0x200
+#define TCG_CT_CONST_NEG 0x400
 
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
@@ -161,6 +162,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     case 'K':
          ct->ct |= TCG_CT_CONST_INV;
          break;
+    case 'N': /* The gcc constraint letter is L, already used here.  */
+         ct->ct |= TCG_CT_CONST_NEG;
+         break;
 
     case 'r':
         ct->ct |= TCG_CT_REG;
@@ -291,6 +295,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
         return 1;
     } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
         return 1;
+    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
+        return 1;
     } else {
         return 0;
     }
@@ -512,6 +518,27 @@ static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
     }
 }
 
+static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
+                            TCGArg dst, TCGArg lhs, TCGArg rhs,
+                            bool rhs_is_const)
+{
+    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+     * rhs must satisfy the "rIN" constraint.
+     */
+    if (rhs_is_const) {
+        int rot = encode_imm(rhs);
+        if (rot < 0) {
+            rhs = -rhs;
+            rot = encode_imm(rhs);
+            assert(rot >= 0);
+            opc = opneg;
+        }
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+    } else {
+        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+    }
+}
+
 static inline void tcg_out_mul32(TCGContext *s,
                 int cond, int rd, int rs, int rm)
 {
@@ -1634,11 +1661,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                        ARITH_MOV, args[0], 0, args[3], const_args[3]);
         break;
     case INDEX_op_add_i32:
-        c = ARITH_ADD;
-        goto gen_arith;
+        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
+                        args[0], args[1], args[2], const_args[2]);
+        break;
     case INDEX_op_sub_i32:
-        c = ARITH_SUB;
-        goto gen_arith;
+        tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
+                        args[0], args[1], args[2], const_args[2]);
+        break;
     case INDEX_op_and_i32:
         tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
                         args[0], args[1], args[2], const_args[2]);
@@ -1829,8 +1858,8 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_st_i32, { "r", "r" } },
 
     /* TODO: "r", "r", "ri" */
-    { INDEX_op_add_i32, { "r", "r", "rI" } },
-    { INDEX_op_sub_i32, { "r", "r", "rI" } },
+    { INDEX_op_add_i32, { "r", "r", "rIN" } },
+    { INDEX_op_sub_i32, { "r", "r", "rIN" } },
     { INDEX_op_mul_i32, { "r", "r", "r" } },
     { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
     { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 3/9] tcg-arm: Allow constant first argument to sub
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 1/9] tcg-arm: Use bic to implement and with constant Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 2/9] tcg-arm: Handle negated constant arguments to and/sub Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 4/9] tcg-arm: Use tcg_out_dat_rIN for compares Richard Henderson
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

This allows the generation of RSB instructions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index f470caa..7475142 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1665,8 +1665,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                         args[0], args[1], args[2], const_args[2]);
         break;
     case INDEX_op_sub_i32:
-        tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
-                        args[0], args[1], args[2], const_args[2]);
+        if (const_args[1]) {
+            if (const_args[2]) {
+                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
+            } else {
+                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
+                               args[0], args[2], args[1], 1);
+            }
+        } else {
+            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
+                            args[0], args[1], args[2], const_args[2]);
+        }
         break;
     case INDEX_op_and_i32:
         tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
@@ -1859,7 +1868,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
 
     /* TODO: "r", "r", "ri" */
     { INDEX_op_add_i32, { "r", "r", "rIN" } },
-    { INDEX_op_sub_i32, { "r", "r", "rIN" } },
+    { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
     { INDEX_op_mul_i32, { "r", "r", "r" } },
     { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
     { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 4/9] tcg-arm: Use tcg_out_dat_rIN for compares
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (2 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 3/9] tcg-arm: Allow constant first argument to sub Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 5/9] tcg-arm: Handle constant arguments to add2/sub2 Richard Henderson
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

This allows us to emit CMN instructions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 7475142..3152798 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1655,10 +1655,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         /* Constraints mean that v2 is always in the same register as dest,
          * so we only need to do "if condition passed, move v1 to dest".
          */
-        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
-                       args[1], args[2], const_args[2]);
-        tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]],
-                       ARITH_MOV, args[0], 0, args[3], const_args[3]);
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+                        args[1], args[2], const_args[2]);
+        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
+                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
         break;
     case INDEX_op_add_i32:
         tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
@@ -1755,7 +1755,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_brcond_i32:
-        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
                        args[0], args[1], const_args[1]);
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
         break;
@@ -1768,15 +1768,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
          * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
          */
-        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                        args[1], args[3], SHIFT_IMM_LSL(0));
-        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
-                        args[0], args[2], SHIFT_IMM_LSL(0));
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+                        args[1], args[3], const_args[3]);
+        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
+                        args[0], args[2], const_args[2]);
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
         break;
     case INDEX_op_setcond_i32:
-        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
-                       args[1], args[2], const_args[2]);
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+                        args[1], args[2], const_args[2]);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
                         ARITH_MOV, args[0], 0, 1);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
@@ -1784,10 +1784,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_setcond2_i32:
         /* See brcond2_i32 comment */
-        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                        args[2], args[4], SHIFT_IMM_LSL(0));
-        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
-                        args[1], args[3], SHIFT_IMM_LSL(0));
+        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+                        args[2], args[4], const_args[4]);
+        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
+                        args[1], args[3], const_args[3]);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
                         ARITH_MOV, args[0], 0, 1);
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
@@ -1885,15 +1885,15 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_rotl_i32, { "r", "r", "ri" } },
     { INDEX_op_rotr_i32, { "r", "r", "ri" } },
 
-    { INDEX_op_brcond_i32, { "r", "rI" } },
-    { INDEX_op_setcond_i32, { "r", "r", "rI" } },
-    { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } },
+    { INDEX_op_brcond_i32, { "r", "rIN" } },
+    { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
+    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
 
     /* TODO: "r", "r", "r", "r", "ri", "ri" */
     { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
     { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } },
-    { INDEX_op_brcond2_i32, { "r", "r", "r", "r" } },
-    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r" } },
+    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
 
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "l" } },
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 5/9] tcg-arm: Handle constant arguments to add2/sub2
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (3 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 4/9] tcg-arm: Use tcg_out_dat_rIN for compares Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 6/9] tcg-arm: Improve constant generation Richard Henderson
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

We get to re-use the _rIN and _rIK subroutines to handle the various
combinations of add vs sub.  Fold the << 21 into the opcode enum values
so that we can explicitly add TO_CPSR as desired.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 106 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 58 insertions(+), 48 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 3152798..48cc186 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -302,27 +302,26 @@ static inline int tcg_target_const_match(tcg_target_long val,
     }
 }
 
+#define TO_CPSR (1 << 20)
+
 enum arm_data_opc_e {
-    ARITH_AND = 0x0,
-    ARITH_EOR = 0x1,
-    ARITH_SUB = 0x2,
-    ARITH_RSB = 0x3,
-    ARITH_ADD = 0x4,
-    ARITH_ADC = 0x5,
-    ARITH_SBC = 0x6,
-    ARITH_RSC = 0x7,
-    ARITH_TST = 0x8,
-    ARITH_CMP = 0xa,
-    ARITH_CMN = 0xb,
-    ARITH_ORR = 0xc,
-    ARITH_MOV = 0xd,
-    ARITH_BIC = 0xe,
-    ARITH_MVN = 0xf,
+    ARITH_AND = 0x0 << 21,
+    ARITH_EOR = 0x1 << 21,
+    ARITH_SUB = 0x2 << 21,
+    ARITH_RSB = 0x3 << 21,
+    ARITH_ADD = 0x4 << 21,
+    ARITH_ADC = 0x5 << 21,
+    ARITH_SBC = 0x6 << 21,
+    ARITH_RSC = 0x7 << 21,
+    ARITH_TST = 0x8 << 21 | TO_CPSR,
+    ARITH_CMP = 0xa << 21 | TO_CPSR,
+    ARITH_CMN = 0xb << 21 | TO_CPSR,
+    ARITH_ORR = 0xc << 21,
+    ARITH_MOV = 0xd << 21,
+    ARITH_BIC = 0xe << 21,
+    ARITH_MVN = 0xf << 21,
 };
 
-#define TO_CPSR(opc) \
-  ((opc == ARITH_CMP || opc == ARITH_CMN || opc == ARITH_TST) << 20)
-
 #define SHIFT_IMM_LSL(im)	(((im) << 7) | 0x00)
 #define SHIFT_IMM_LSR(im)	(((im) << 7) | 0x20)
 #define SHIFT_IMM_ASR(im)	(((im) << 7) | 0x40)
@@ -409,7 +408,7 @@ static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 static inline void tcg_out_dat_reg(TCGContext *s,
                 int cond, int opc, int rd, int rn, int rm, int shift)
 {
-    tcg_out32(s, (cond << 28) | (0 << 25) | (opc << 21) | TO_CPSR(opc) |
+    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
                     (rn << 16) | (rd << 12) | shift | rm);
 }
 
@@ -421,29 +420,10 @@ static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
     }
 }
 
-static inline void tcg_out_dat_reg2(TCGContext *s,
-                int cond, int opc0, int opc1, int rd0, int rd1,
-                int rn0, int rn1, int rm0, int rm1, int shift)
-{
-    if (rd0 == rn1 || rd0 == rm1) {
-        tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) |
-                        (rn0 << 16) | (8 << 12) | shift | rm0);
-        tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) |
-                        (rn1 << 16) | (rd1 << 12) | shift | rm1);
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        rd0, 0, TCG_REG_R8, SHIFT_IMM_LSL(0));
-    } else {
-        tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) |
-                        (rn0 << 16) | (rd0 << 12) | shift | rm0);
-        tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) |
-                        (rn1 << 16) | (rd1 << 12) | shift | rm1);
-    }
-}
-
 static inline void tcg_out_dat_imm(TCGContext *s,
                 int cond, int opc, int rd, int rn, int im)
 {
-    tcg_out32(s, (cond << 28) | (1 << 25) | (opc << 21) | TO_CPSR(opc) |
+    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
                     (rn << 16) | (rd << 12) | im);
 }
 
@@ -1563,6 +1543,7 @@ static uint8_t *tb_ret_addr;
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                 const TCGArg *args, const int *const_args)
 {
+    TCGArg a0, a1, a2, a3, a4, a5;
     int c;
 
     switch (opc) {
@@ -1695,14 +1676,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
         break;
     case INDEX_op_add2_i32:
-        tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC,
-                        args[0], args[1], args[2], args[3],
-                        args[4], args[5], SHIFT_IMM_LSL(0));
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        a3 = args[3], a4 = args[4], a5 = args[5];
+        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
+            a0 = TCG_REG_R8;
+        }
+        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
+                        a0, a2, a4, const_args[4]);
+        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
+                        a1, a3, a5, const_args[5]);
+        tcg_out_mov_reg(s, COND_AL, args[0], a0);
         break;
     case INDEX_op_sub2_i32:
-        tcg_out_dat_reg2(s, COND_AL, ARITH_SUB, ARITH_SBC,
-                        args[0], args[1], args[2], args[3],
-                        args[4], args[5], SHIFT_IMM_LSL(0));
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        a3 = args[3], a4 = args[4], a5 = args[5];
+        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
+            a0 = TCG_REG_R8;
+        }
+        if (const_args[2]) {
+            if (const_args[4]) {
+                tcg_out_movi32(s, COND_AL, a0, a4);
+                a4 = a0;
+            }
+            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
+        } else {
+            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
+                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
+        }
+        if (const_args[3]) {
+            if (const_args[5]) {
+                tcg_out_movi32(s, COND_AL, a1, a5);
+                a5 = a1;
+            }
+            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
+        } else {
+            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
+                            a1, a3, a5, const_args[5]);
+        }
+        tcg_out_mov_reg(s, COND_AL, args[0], a0);
         break;
     case INDEX_op_neg_i32:
         tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
@@ -1889,9 +1900,8 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
     { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
 
-    /* TODO: "r", "r", "r", "r", "ri", "ri" */
-    { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
-    { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } },
+    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
     { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
 
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 6/9] tcg-arm: Improve constant generation
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (4 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 5/9] tcg-arm: Handle constant arguments to add2/sub2 Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 7/9] tcg-arm: Fold epilogue into INDEX_op_exit_tb Richard Henderson
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

Try fully rotated arguments to mov and mvn before trying movt
or full decomposition.  Begin decomposition with mvn when it
looks like it'll help.  Examples include

-:        mov   r9, #0x00000fa0
-:        orr   r9, r9, #0x000ee000
-:        orr   r9, r9, #0x0ff00000
-:        orr   r9, r9, #0xf0000000
+:        mvn   r9, #0x0000005f
+:        eor   r9, r9, #0x00011000

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 67 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 23 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 48cc186..5333fad 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -427,15 +427,31 @@ static inline void tcg_out_dat_imm(TCGContext *s,
                     (rn << 16) | (rd << 12) | im);
 }
 
-static inline void tcg_out_movi32(TCGContext *s,
-                int cond, int rd, uint32_t arg)
-{
-    /* TODO: This is very suboptimal, we can easily have a constant
-     * pool somewhere after all the instructions.  */
-    if ((int)arg < 0 && (int)arg >= -0x100) {
-        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff);
-    } else if (use_armv7_instructions) {
-        /* use movw/movt */
+static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
+{
+    int rot, opc, rn;
+
+    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
+       Speed things up by only checking when movt would be required.
+       Prior to armv7, have one go at fully rotated immediates before
+       doing the decomposition thing below.  */
+    if (!use_armv7_instructions || (arg & 0xffff0000)) {
+        rot = encode_imm(arg);
+        if (rot >= 0) {
+            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
+                            rotl(arg, rot) | (rot << 7));
+            return;
+        }
+        rot = encode_imm(~arg);
+        if (rot >= 0) {
+            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
+                            rotl(~arg, rot) | (rot << 7));
+            return;
+        }
+    }
+
+    /* Use movw + movt.  */
+    if (use_armv7_instructions) {
         /* movw */
         tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
                   | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
@@ -444,22 +460,27 @@ static inline void tcg_out_movi32(TCGContext *s,
             tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
                       | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
         }
-    } else {
-        int opc = ARITH_MOV;
-        int rn = 0;
-
-        do {
-            int i, rot;
-
-            i = ctz32(arg) & ~1;
-            rot = ((32 - i) << 7) & 0xf00;
-            tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
-            arg &= ~(0xff << i);
+        return;
+    }
 
-            opc = ARITH_ORR;
-            rn = rd;
-        } while (arg);
+    /* TODO: This is very suboptimal, we can easily have a constant
+       pool somewhere after all the instructions.  */
+    opc = ARITH_MOV;
+    rn = 0;
+    /* If we have lots of leading 1's, we can shorten the sequence by
+       beginning with mvn and then clearing higher bits with eor.  */
+    if (clz32(~arg) > clz32(arg)) {
+        opc = ARITH_MVN, arg = ~arg;
     }
+    do {
+        int i = ctz32(arg) & ~1;
+        rot = ((32 - i) << 7) & 0xf00;
+        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
+        arg &= ~(0xff << i);
+
+        opc = ARITH_EOR;
+        rn = rd;
+    } while (arg);
 }
 
 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 7/9] tcg-arm: Fold epilogue into INDEX_op_exit_tb
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (5 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 6/9] tcg-arm: Improve constant generation Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 8/9] tcg-arm: Implement deposit for armv7 Richard Henderson
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

The epilogue on ARM is one pop instruction, that pops the return
address into PC.  Avoid the jump to jump for this case.  Use the
standard movi32 routine for loading the return value if it's easy.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 5333fad..88f5689 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1559,7 +1559,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 #endif
 }
 
-static uint8_t *tb_ret_addr;
+static uint32_t tb_pop_ret;
 
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                 const TCGArg *args, const int *const_args)
@@ -1569,17 +1569,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        {
-            uint8_t *ld_ptr = s->code_ptr;
-            if (args[0] >> 8)
-                tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
-            else
-                tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]);
-            tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
-            if (args[0] >> 8) {
-                *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8;
-                tcg_out32(s, args[0]);
-            }
+        a0 = args[0];
+        if (use_armv7_instructions || check_fit_imm(a0)) {
+            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
+            tcg_out32(s, tb_pop_ret);
+        } else {
+            /* pc is always current address + 8, so 0 reads the word.  */
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
+            tcg_out32(s, tb_pop_ret);
+            tcg_out32(s, args[0]);
         }
         break;
     case INDEX_op_goto_tb:
@@ -2025,8 +2023,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
     tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
-    tb_ret_addr = s->code_ptr;
 
     /* ldmia sp!, { r4 - r12, pc } */
-    tcg_out32(s, (COND_AL << 28) | 0x08bd9ff0);
+    tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
 }
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 8/9] tcg-arm: Implement deposit for armv7
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (6 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 7/9] tcg-arm: Fold epilogue into INDEX_op_exit_tb Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 9/9] tcg-arm: Implement division instructions Richard Henderson
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

We have BFI and BFC available for implementing it.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 36 ++++++++++++++++++++++++++++++++++++
 tcg/arm/tcg-target.h |  5 ++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 88f5689..4950eaf 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -702,6 +702,35 @@ static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
     }
 }
 
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+    /* ??? Without bfi, we could improve over generic code by combining
+       the right-shift from a non-zero ofs with the orr.  We do run into
+       problems when rd == rs, and the mask generated from ofs+len don't
+       fit into an immediate.  We would have to be careful not to pessimize
+       wrt the optimizations performed on the expanded code.  */
+    return use_armv7_instructions;
+}
+
+static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
+                                   TCGArg a1, int ofs, int len, bool const_a1)
+{
+    if (const_a1) {
+        uint32_t mask = (2u << (len - 1)) - 1;
+        a1 &= mask;
+        if (a1 == 0) {
+            /* bfi becomes bfc with rn == 15.  */
+            a1 = 15;
+        } else {
+            tcg_out_movi32(s, cond, TCG_REG_R8, a1);
+            a1 = TCG_REG_R8;
+        }
+    }
+    /* bfi/bfc */
+    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
+              | (ofs << 7) | ((ofs + len - 1) << 16));
+}
+
 static inline void tcg_out_ld32_12(TCGContext *s, int cond,
                 int rd, int rn, tcg_target_long im)
 {
@@ -1873,6 +1902,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_ext16u(s, COND_AL, args[0], args[1]);
         break;
 
+    case INDEX_op_deposit_i32:
+        tcg_out_deposit(s, COND_AL, args[0], args[2],
+                        args[3], args[4], const_args[2]);
+        break;
+
     default:
         tcg_abort();
     }
@@ -1957,6 +1991,8 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_ext16s_i32, { "r", "r" } },
     { INDEX_op_ext16u_i32, { "r", "r" } },
 
+    { INDEX_op_deposit_i32, { "r", "0", "ri" } },
+
     { -1 },
 };
 
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 354dd8a..209f585 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -71,10 +71,13 @@ typedef enum {
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_muls2_i32        1
 
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
+
 enum {
     TCG_AREG0 = TCG_REG_R6,
 };
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Qemu-devel] [PATCH v2 9/9] tcg-arm: Implement division instructions
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (7 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 8/9] tcg-arm: Implement deposit for armv7 Richard Henderson
@ 2013-03-12  6:43 ` Richard Henderson
  2013-03-13 13:52 ` [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Laurent Desnogues
  2013-03-20  0:17 ` Richard Henderson
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-12  6:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: jay.foad

An armv7 extension implements division, present on Cortex A15.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 disas/arm.c          |  4 ++++
 tcg/arm/tcg-target.c | 36 ++++++++++++++++++++++++++++++++++++
 tcg/arm/tcg-target.h |  7 ++++++-
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/disas/arm.c b/disas/arm.c
index 4927d8a..76e97a8 100644
--- a/disas/arm.c
+++ b/disas/arm.c
@@ -819,6 +819,10 @@ static const struct opcode32 arm_opcodes[] =
   {ARM_EXT_V3M, 0x00800090, 0x0fa000f0, "%22?sumull%20's%c\t%12-15r, %16-19r, %0-3r, %8-11r"},
   {ARM_EXT_V3M, 0x00a00090, 0x0fa000f0, "%22?sumlal%20's%c\t%12-15r, %16-19r, %0-3r, %8-11r"},
 
+  /* IDIV instructions.  */
+  {ARM_EXT_DIV, 0x0710f010, 0x0ff0f0f0, "sdiv%c\t%16-19r, %0-3r, %8-11r"},
+  {ARM_EXT_DIV, 0x0730f010, 0x0ff0f0f0, "udiv%c\t%16-19r, %0-3r, %8-11r"},
+
   /* V7 instructions.  */
   {ARM_EXT_V7, 0xf450f000, 0xfd70f000, "pli\t%P"},
   {ARM_EXT_V7, 0x0320f0f0, 0x0ffffff0, "dbg%c\t#%0-3d"},
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 4950eaf..e599794 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -591,6 +591,16 @@ static inline void tcg_out_smull32(TCGContext *s,
     }
 }
 
+static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
+{
+    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
+}
+
+static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
+{
+    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
+}
+
 static inline void tcg_out_ext8s(TCGContext *s, int cond,
                                  int rd, int rn)
 {
@@ -1907,6 +1917,25 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                         args[3], args[4], const_args[2]);
         break;
 
+    case INDEX_op_div_i32:
+        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_divu_i32:
+        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_rem_i32:
+        tcg_out_sdiv(s, COND_AL, TCG_REG_R8, args[1], args[2]);
+        tcg_out_mul32(s, COND_AL, TCG_REG_R8, TCG_REG_R8, args[2]);
+        tcg_out_dat_reg(s, COND_AL, ARITH_SUB, args[0], args[1], TCG_REG_R8,
+                        SHIFT_IMM_LSL(0));
+        break;
+    case INDEX_op_remu_i32:
+        tcg_out_udiv(s, COND_AL, TCG_REG_R8, args[1], args[2]);
+        tcg_out_mul32(s, COND_AL, TCG_REG_R8, TCG_REG_R8, args[2]);
+        tcg_out_dat_reg(s, COND_AL, ARITH_SUB, args[0], args[1], TCG_REG_R8,
+                        SHIFT_IMM_LSL(0));
+        break;
+
     default:
         tcg_abort();
     }
@@ -1993,6 +2022,13 @@ static const TCGTargetOpDef arm_op_defs[] = {
 
     { INDEX_op_deposit_i32, { "r", "0", "ri" } },
 
+#if TCG_TARGET_HAS_div_i32
+    { INDEX_op_div_i32, { "r", "r", "r" } },
+    { INDEX_op_rem_i32, { "r", "r", "r" } },
+    { INDEX_op_divu_i32, { "r", "r", "r" } },
+    { INDEX_op_remu_i32, { "r", "r", "r" } },
+#endif
+
     { -1 },
 };
 
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 209f585..3be41cc 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -56,7 +56,6 @@ typedef enum {
 #define TCG_TARGET_CALL_STACK_OFFSET	0
 
 /* optional instructions */
-#define TCG_TARGET_HAS_div_i32          0
 #define TCG_TARGET_HAS_ext8s_i32        1
 #define TCG_TARGET_HAS_ext16s_i32       1
 #define TCG_TARGET_HAS_ext8u_i32        0 /* and r0, r1, #0xff */
@@ -75,6 +74,12 @@ typedef enum {
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_muls2_i32        1
 
+#ifdef __ARM_ARCH_EXT_IDIV__
+#define TCG_TARGET_HAS_div_i32          1
+#else
+#define TCG_TARGET_HAS_div_i32          0
+#endif
+
 extern bool tcg_target_deposit_valid(int ofs, int len);
 #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
 
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (8 preceding siblings ...)
  2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 9/9] tcg-arm: Implement division instructions Richard Henderson
@ 2013-03-13 13:52 ` Laurent Desnogues
  2013-03-20  0:17 ` Richard Henderson
  10 siblings, 0 replies; 12+ messages in thread
From: Laurent Desnogues @ 2013-03-13 13:52 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, jay.foad

On Tue, Mar 12, 2013 at 7:43 AM, Richard Henderson <rth@twiddle.net> wrote:
> Changes v1-v2:
>   * Use more helper functions to handle K and N constraints.
>   * Improve add2/sub2.
>   * Improve epilogues, as suggested in the previous thread.
>   * Fix a typo in the name of the deposit helper.
>   * Implement division for cortex-a15.

I applied all of the patch series and tested nbench in user mode
on a quad core Cortex-A9 (which means I didn't test the div part
and the add2/sub2 part since it's i386 code).

It works and gave some nice speedup.

Thanks,

Laurent

>
> r~
>
>
> Richard Henderson (9):
>   tcg-arm: Use bic to implement and with constant
>   tcg-arm: Handle negated constant arguments to and/sub
>   tcg-arm: Allow constant first argument to sub
>   tcg-arm: Use tcg_out_dat_rIN for compares
>   tcg-arm: Handle constant arguments to add2/sub2
>   tcg-arm: Improve constant generation
>   tcg-arm: Fold epilogue into INDEX_op_exit_tb
>   tcg-arm: Implement deposit for armv7
>   tcg-arm: Implement division instructions
>
>  disas/arm.c          |   4 +
>  tcg/arm/tcg-target.c | 410 ++++++++++++++++++++++++++++++++++++---------------
>  tcg/arm/tcg-target.h |  14 +-
>  3 files changed, 304 insertions(+), 124 deletions(-)
>
> --
> 1.8.1.2
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements
  2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
                   ` (9 preceding siblings ...)
  2013-03-13 13:52 ` [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Laurent Desnogues
@ 2013-03-20  0:17 ` Richard Henderson
  10 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2013-03-20  0:17 UTC (permalink / raw)
  To: qemu-devel

Ping.

r~

On 2013-03-11 23:43, Richard Henderson wrote:
> Changes v1-v2:
>    * Use more helper functions to handle K and N constraints.
>    * Improve add2/sub2.
>    * Improve epilogues, as suggested in the previous thread.
>    * Fix a typo in the name of the deposit helper.
>    * Implement division for cortex-a15.
>
>
> r~
>
>
> Richard Henderson (9):
>    tcg-arm: Use bic to implement and with constant
>    tcg-arm: Handle negated constant arguments to and/sub
>    tcg-arm: Allow constant first argument to sub
>    tcg-arm: Use tcg_out_dat_rIN for compares
>    tcg-arm: Handle constant arguments to add2/sub2
>    tcg-arm: Improve constant generation
>    tcg-arm: Fold epilogue into INDEX_op_exit_tb
>    tcg-arm: Implement deposit for armv7
>    tcg-arm: Implement division instructions
>
>   disas/arm.c          |   4 +
>   tcg/arm/tcg-target.c | 410 ++++++++++++++++++++++++++++++++++++---------------
>   tcg/arm/tcg-target.h |  14 +-
>   3 files changed, 304 insertions(+), 124 deletions(-)
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-03-20  0:18 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-12  6:43 [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 1/9] tcg-arm: Use bic to implement and with constant Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 2/9] tcg-arm: Handle negated constant arguments to and/sub Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 3/9] tcg-arm: Allow constant first argument to sub Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 4/9] tcg-arm: Use tcg_out_dat_rIN for compares Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 5/9] tcg-arm: Handle constant arguments to add2/sub2 Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 6/9] tcg-arm: Improve constant generation Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 7/9] tcg-arm: Fold epilogue into INDEX_op_exit_tb Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 8/9] tcg-arm: Implement deposit for armv7 Richard Henderson
2013-03-12  6:43 ` [Qemu-devel] [PATCH v2 9/9] tcg-arm: Implement division instructions Richard Henderson
2013-03-13 13:52 ` [Qemu-devel] [PATCH v2 0/9] tcg-arm improvements Laurent Desnogues
2013-03-20  0:17 ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).