qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PULL 10/10] tcg: Further optimizations for add2 and sub2_i32
Date: Fri,  9 Jan 2015 13:23:20 -0800	[thread overview]
Message-ID: <1420838600-22369-11-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1420838600-22369-1-git-send-email-rth@twiddle.net>

Notice when the low parts of the operation cannot cause overflow,
and thus reduce the high part to a simple add/sub.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/optimize.c | 83 +++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 21 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c674fe2..bf3dc60 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1129,33 +1129,74 @@ static void tcg_constant_folding(TCGContext *s)
         case INDEX_op_add2_i32:
         case INDEX_op_sub2_i32:
             if (temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[4]].state == TCG_TEMP_CONST
-                && temps[args[5]].state == TCG_TEMP_CONST) {
+                && temps[args[4]].state == TCG_TEMP_CONST) {
                 uint32_t al = temps[args[2]].val;
-                uint32_t ah = temps[args[3]].val;
                 uint32_t bl = temps[args[4]].val;
-                uint32_t bh = temps[args[5]].val;
-                uint64_t a = ((uint64_t)ah << 32) | al;
-                uint64_t b = ((uint64_t)bh << 32) | bl;
-                TCGArg rl, rh;
-                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
-                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
-                if (opc == INDEX_op_add2_i32) {
-                    a += b;
-                } else {
-                    a -= b;
+                if (temps[args[3]].state == TCG_TEMP_CONST
+                    && temps[args[5]].state == TCG_TEMP_CONST) {
+                    /* The entire 64-bit quantity is a constant.  */
+                    uint32_t ah = temps[args[3]].val;
+                    uint32_t bh = temps[args[5]].val;
+                    uint64_t a = ((uint64_t)ah << 32) | al;
+                    uint64_t b = ((uint64_t)bh << 32) | bl;
+                    TCGArg rl, rh;
+                    TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                    TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+                    if (opc == INDEX_op_add2_i32) {
+                        a += b;
+                    } else {
+                        a -= b;
+                    }
+
+                    rl = args[0];
+                    rh = args[1];
+                    tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
+                    tcg_opt_gen_movi(s, op2, args2, opc, rh,
+                                     (uint32_t)(a >> 32));
+
+                    /* We've done all we need to do with the movi.  Skip it.  */
+                    oi_next = op2->next;
+                    break;
                 }
+                if (opc == INDEX_op_add2_i32 ? al + bl >= al : al >= bl) {
+                    /* The low part of the operation is constant,
+                       and does not produce a carry/borrow.  */
+                    TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                    TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+                    if (opc == INDEX_op_add2_i32) {
+                        al += bl;
+                    } else {
+                        al -= bl;
+                    }
 
-                rl = args[0];
-                rh = args[1];
-                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
-                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32));
+                    tcg_opt_gen_movi(s, op2, args2, opc, args[0], al);
+                do_addsub2_high:
+                    if (opc == INDEX_op_add2_i32) {
+                        op->opc = INDEX_op_add_i32;
+                    } else {
+                        op->opc = INDEX_op_sub_i32;
+                    }
+                    args[0] = args[1];
+                    args[1] = args[3];
+                    args[2] = args[5];
 
-                /* We've done all we need to do with the movi.  Skip it.  */
-                oi_next = op2->next;
-                break;
+                    /* We may be able to simplify the new op further.  */
+                    break;
+                }
+            }
+            if (temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[4]].val == 0
+                && args[3] != args[0] && args[5] != args[0]) {
+                /* The second low part of the operation is zero,
+                   and thus cannot produce a carry/borrow.  */
+                TCGOp *op2 = insert_op_before(s, op, INDEX_op_mov_i32, 2);
+                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+                tcg_opt_gen_mov(s, op2, args2, opc, args[0], args[2]);
+                goto do_addsub2_high;
             }
             goto do_default;
 
-- 
2.1.0

  parent reply	other threads:[~2015-01-09 21:24 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-09 21:23 [Qemu-devel] [PULL 00/10] Linked list for tcg ops Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 01/10] tcg: Move some opcode generation functions out of line Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 02/10] tcg: Reduce ifdefs in tcg-op.c Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 03/10] tcg: Move emit of INDEX_op_end into gen_tb_end Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 04/10] tcg: Introduce tcg_op_buf_count and tcg_op_buf_full Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 05/10] tcg: Put opcodes in a linked list Richard Henderson
2015-01-12 19:47   ` Peter Maydell
2015-01-09 21:23 ` [Qemu-devel] [PULL 06/10] tcg: Remove opcodes instead of noping them out Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 07/10] tcg: Implement insert_op_before Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 08/10] tcg: Remove unused opcodes Richard Henderson
2015-01-09 21:23 ` [Qemu-devel] [PULL 09/10] tcg: Optimize muls2_i32 Richard Henderson
2015-01-09 21:23 ` Richard Henderson [this message]
2015-01-12 10:06 ` [Qemu-devel] [PULL 00/10] Linked list for tcg ops Peter Maydell
2015-01-12 10:09   ` Peter Maydell
2015-01-12 15:20     ` Richard Henderson
2015-01-12 19:48       ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1420838600-22369-11-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).