[Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Aurelien Jarno <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
Date: Fri, 11 Jan 2013 15:42:52 -0800	[thread overview]
Message-ID: <1357947773-31051-3-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1357947773-31051-1-git-send-email-rth@twiddle.net>

From: Paolo Bonzini <pbonzini@redhat.com>

Add a "mask" field to the tcg_temp_info struct.  A bit that is zero
in "mask" will always be zero in the corresponding temporary.
Zero bits in the mask can be produced from moves of immediates,
zero-extensions, ANDs with constants, shifts; they can then be
be propagated by logical operations, shifts, sign-extensions,
negations, deposit operations, and conditional moves.  Other
operations will just reset the mask to all-ones, i.e. unknown.

[rth: s/target_ulong/tcg_target_ulong/]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/optimize.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 110 insertions(+), 22 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9d05a72..090efbc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -46,6 +46,7 @@ struct tcg_temp_info {
     uint16_t prev_copy;
     uint16_t next_copy;
     tcg_target_ulong val;
+    tcg_target_ulong mask;
 };
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
@@ -63,6 +64,7 @@ static void reset_temp(TCGArg temp)
         }
     }
     temps[temp].state = TCG_TEMP_UNDEF;
+    temps[temp].mask = -1;
 }
 
 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
@@ -71,6 +73,7 @@ static void reset_all_temps(int nb_temps)
     int i;
     for (i = 0; i < nb_temps; i++) {
         temps[i].state = TCG_TEMP_UNDEF;
+        temps[i].mask = -1;
     }
 }
 
@@ -148,33 +151,35 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
                             TCGArg dst, TCGArg src)
 {
-        reset_temp(dst);
-        assert(temps[src].state != TCG_TEMP_CONST);
-
-        if (s->temps[src].type == s->temps[dst].type) {
-            if (temps[src].state != TCG_TEMP_COPY) {
-                temps[src].state = TCG_TEMP_COPY;
-                temps[src].next_copy = src;
-                temps[src].prev_copy = src;
-            }
-            temps[dst].state = TCG_TEMP_COPY;
-            temps[dst].next_copy = temps[src].next_copy;
-            temps[dst].prev_copy = src;
-            temps[temps[dst].next_copy].prev_copy = dst;
-            temps[src].next_copy = dst;
+    reset_temp(dst);
+    temps[dst].mask = temps[src].mask;
+    assert(temps[src].state != TCG_TEMP_CONST);
+
+    if (s->temps[src].type == s->temps[dst].type) {
+        if (temps[src].state != TCG_TEMP_COPY) {
+            temps[src].state = TCG_TEMP_COPY;
+            temps[src].next_copy = src;
+            temps[src].prev_copy = src;
         }
+        temps[dst].state = TCG_TEMP_COPY;
+        temps[dst].next_copy = temps[src].next_copy;
+        temps[dst].prev_copy = src;
+        temps[temps[dst].next_copy].prev_copy = dst;
+        temps[src].next_copy = dst;
+    }
 
-        gen_args[0] = dst;
-        gen_args[1] = src;
+    gen_args[0] = dst;
+    gen_args[1] = src;
 }
 
 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 {
-        reset_temp(dst);
-        temps[dst].state = TCG_TEMP_CONST;
-        temps[dst].val = val;
-        gen_args[0] = dst;
-        gen_args[1] = val;
+    reset_temp(dst);
+    temps[dst].state = TCG_TEMP_CONST;
+    temps[dst].val = val;
+    temps[dst].mask = val;
+    gen_args[0] = dst;
+    gen_args[1] = val;
 }
 
 static TCGOpcode op_to_mov(TCGOpcode op)
@@ -479,6 +484,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
 {
     int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
+    tcg_target_ulong mask;
     TCGOpcode op;
     const TCGOpDef *def;
     TCGArg *gen_args;
@@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
+        /* Simplify using known-zero bits */
+        mask = -1;
+        switch (op) {
+        CASE_OP_32_64(ext8s):
+            if ((temps[args[1]].mask & 0x80) != 0) {
+                break;
+            }
+        CASE_OP_32_64(ext8u):
+            mask = 0xff;
+            goto and_const;
+        CASE_OP_32_64(ext16s):
+            if ((temps[args[1]].mask & 0x8000) != 0) {
+                break;
+            }
+        CASE_OP_32_64(ext16u):
+            mask = 0xffff;
+            goto and_const;
+        case INDEX_op_ext32s_i64:
+            if ((temps[args[1]].mask & 0x80000000) != 0) {
+                break;
+            }
+        case INDEX_op_ext32u_i64:
+            mask = 0xffffffffU;
+            goto and_const;
+
+        CASE_OP_32_64(and):
+            mask = temps[args[2]].mask;
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+        and_const:
+                ;
+            }
+            mask = temps[args[1]].mask & mask;
+            break;
+
+        CASE_OP_32_64(sar):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = ((tcg_target_long)temps[args[1]].mask
+                        >> temps[args[2]].val);
+            }
+            break;
+
+        CASE_OP_32_64(shr):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = temps[args[1]].mask >> temps[args[2]].val;
+            }
+            break;
+
+        CASE_OP_32_64(shl):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = temps[args[1]].mask << temps[args[2]].val;
+            }
+            break;
+
+        CASE_OP_32_64(neg):
+            /* Set to 1 all bits to the left of the rightmost.  */
+            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
+            break;
+
+        CASE_OP_32_64(deposit):
+            tmp = ((1ull << args[4]) - 1);
+            mask = ((temps[args[1]].mask & ~(tmp << args[3]))
+                    | ((temps[args[2]].mask & tmp) << args[3]));
+            break;
+
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
+            mask = temps[args[1]].mask | temps[args[2]].mask;
+            break;
+
+        CASE_OP_32_64(setcond):
+            mask = 1;
+            break;
+
+        CASE_OP_32_64(movcond):
+            mask = temps[args[3]].mask | temps[args[4]].mask;
+            break;
+
+        default:
+            break;
+        }
+
         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
         switch (op) {
         CASE_OP_32_64(and):
@@ -947,7 +1034,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             /* Default case: we know nothing about operation (or were unable
                to compute the operation result) so no propagation is done.
                We trash everything if the operation is the end of a basic
-               block, otherwise we only trash the output args.  */
+               block, otherwise we only trash the output args.  "mask" is
+               the non-zero bits mask for the first output arg.  */
             if (def->flags & TCG_OPF_BB_END) {
                 reset_all_temps(nb_temps);
             } else {
-- 
1.7.11.7

next prev parent reply	other threads:[~2013-01-11 23:43 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-11 23:42 [Qemu-devel] [PATCH 0/3] tcg-optimize with known-zero bits Richard Henderson
2013-01-11 23:42 ` [Qemu-devel] [PATCH 1/3] optimize: only write to state when clearing optimizer data Richard Henderson
2013-01-11 23:42 ` Richard Henderson [this message]
2013-01-11 23:42 ` [Qemu-devel] [PATCH 3/3] optimize: optimize using nonzero bits Richard Henderson
2013-01-12  7:55 ` [Qemu-devel] [PATCH 0/3] tcg-optimize with known-zero bits Paolo Bonzini
2013-01-19 13:58 ` Blue Swirl
  -- strict thread matches above, loose matches on Subject: below --
2013-01-16  9:09 [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Jay Foad
2013-01-16 10:47 ` Paolo Bonzini

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:9d05a72 dfblob:090efbc )
 OR (
bs:"[Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1357947773-31051-3-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aurelien@aurel32.net \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).