qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: Aurelien Jarno <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH v4 07/18] tcg-arm: Improve constant generation
Date: Sat, 30 Mar 2013 13:43:16 -0700	[thread overview]
Message-ID: <1364676207-21516-8-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1364676207-21516-1-git-send-email-rth@twiddle.net>

Try fully rotated arguments to mov and mvn before trying movt
or full decomposition.  Begin decomposition with mvn when it
looks like it'll help.  Examples include

-:        mov   r9, #0x00000fa0
-:        orr   r9, r9, #0x000ee000
-:        orr   r9, r9, #0x0ff00000
-:        orr   r9, r9, #0xf0000000
+:        mvn   r9, #0x0000005f
+:        eor   r9, r9, #0x00011000

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 67 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 23 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index bef2e66..ca76902 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -427,15 +427,31 @@ static inline void tcg_out_dat_imm(TCGContext *s,
                     (rn << 16) | (rd << 12) | im);
 }
 
-static inline void tcg_out_movi32(TCGContext *s,
-                int cond, int rd, uint32_t arg)
-{
-    /* TODO: This is very suboptimal, we can easily have a constant
-     * pool somewhere after all the instructions.  */
-    if ((int)arg < 0 && (int)arg >= -0x100) {
-        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff);
-    } else if (use_armv7_instructions) {
-        /* use movw/movt */
+static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
+{
+    int rot, opc, rn;
+
+    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
+       Speed things up by only checking when movt would be required.
+       Prior to armv7, have one go at fully rotated immediates before
+       doing the decomposition thing below.  */
+    if (!use_armv7_instructions || (arg & 0xffff0000)) {
+        rot = encode_imm(arg);
+        if (rot >= 0) {
+            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
+                            rotl(arg, rot) | (rot << 7));
+            return;
+        }
+        rot = encode_imm(~arg);
+        if (rot >= 0) {
+            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
+                            rotl(~arg, rot) | (rot << 7));
+            return;
+        }
+    }
+
+    /* Use movw + movt.  */
+    if (use_armv7_instructions) {
         /* movw */
         tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
                   | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
@@ -444,22 +460,27 @@ static inline void tcg_out_movi32(TCGContext *s,
             tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
                       | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
         }
-    } else {
-        int opc = ARITH_MOV;
-        int rn = 0;
-
-        do {
-            int i, rot;
-
-            i = ctz32(arg) & ~1;
-            rot = ((32 - i) << 7) & 0xf00;
-            tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
-            arg &= ~(0xff << i);
+        return;
+    }
 
-            opc = ARITH_ORR;
-            rn = rd;
-        } while (arg);
+    /* TODO: This is very suboptimal, we can easily have a constant
+       pool somewhere after all the instructions.  */
+    opc = ARITH_MOV;
+    rn = 0;
+    /* If we have lots of leading 1's, we can shorten the sequence by
+       beginning with mvn and then clearing higher bits with eor.  */
+    if (clz32(~arg) > clz32(arg)) {
+        opc = ARITH_MVN, arg = ~arg;
     }
+    do {
+        int i = ctz32(arg) & ~1;
+        rot = ((32 - i) << 7) & 0xf00;
+        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
+        arg &= ~(0xff << i);
+
+        opc = ARITH_EOR;
+        rn = rd;
+    } while (arg);
 }
 
 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
-- 
1.8.1.4

  parent reply	other threads:[~2013-03-30 20:44 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-30 20:43 [Qemu-devel] [PATCH v4 00/18] tcg-arm improvements Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 01/18] tcg-arm: Fix local stack frame Richard Henderson
2013-03-30 21:14   ` Peter Maydell
2013-03-30 21:19     ` Richard Henderson
2013-03-30 21:45       ` Peter Maydell
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 02/18] tcg-arm: Use bic to implement and with constant Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 03/18] tcg-arm: Handle negated constant arguments to and/sub Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 04/18] tcg-arm: Allow constant first argument to sub Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 05/18] tcg-arm: Use tcg_out_dat_rIN for compares Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 06/18] tcg-arm: Handle constant arguments to add2/sub2 Richard Henderson
2013-03-30 20:43 ` Richard Henderson [this message]
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 08/18] tcg-arm: Implement deposit for armv7 Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 09/18] tcg-arm: Implement division instructions Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 10/18] tcg-arm: Use TCG_REG_TMP name for the tcg temporary Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 11/18] tcg-arm: Use R12 " Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 12/18] tcg-arm: Cleanup multiply subroutines Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 13/18] tcg-arm: Cleanup most primitive load store subroutines Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 14/18] tcg-arm: Split out tcg_out_tlb_read Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 15/18] tcg-arm: Improve scheduling of tcg_out_tlb_read Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 16/18] tcg-arm: Use movi32 + blx for calls on v7 Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 17/18] tcg-arm: Convert to CONFIG_QEMU_LDST_OPTIMIZATION Richard Henderson
2013-03-30 20:43 ` [Qemu-devel] [PATCH v4 18/18] tcg-arm: Tidy exit_tb Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1364676207-21516-8-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).