All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
  2013-01-11 23:42 [Qemu-devel] [PATCH 0/3] tcg-optimize with known-zero bits Richard Henderson
@ 2013-01-11 23:42 ` Richard Henderson
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Henderson @ 2013-01-11 23:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, Aurelien Jarno

From: Paolo Bonzini <pbonzini@redhat.com>

Add a "mask" field to the tcg_temp_info struct.  A bit that is zero
in "mask" will always be zero in the corresponding temporary.
Zero bits in the mask can be produced from moves of immediates,
zero-extensions, ANDs with constants, shifts; they can then be
be propagated by logical operations, shifts, sign-extensions,
negations, deposit operations, and conditional moves.  Other
operations will just reset the mask to all-ones, i.e. unknown.

[rth: s/target_ulong/tcg_target_ulong/]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/optimize.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 110 insertions(+), 22 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9d05a72..090efbc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -46,6 +46,7 @@ struct tcg_temp_info {
     uint16_t prev_copy;
     uint16_t next_copy;
     tcg_target_ulong val;
+    tcg_target_ulong mask;
 };
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
@@ -63,6 +64,7 @@ static void reset_temp(TCGArg temp)
         }
     }
     temps[temp].state = TCG_TEMP_UNDEF;
+    temps[temp].mask = -1;
 }
 
 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
@@ -71,6 +73,7 @@ static void reset_all_temps(int nb_temps)
     int i;
     for (i = 0; i < nb_temps; i++) {
         temps[i].state = TCG_TEMP_UNDEF;
+        temps[i].mask = -1;
     }
 }
 
@@ -148,33 +151,35 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
                             TCGArg dst, TCGArg src)
 {
-        reset_temp(dst);
-        assert(temps[src].state != TCG_TEMP_CONST);
-
-        if (s->temps[src].type == s->temps[dst].type) {
-            if (temps[src].state != TCG_TEMP_COPY) {
-                temps[src].state = TCG_TEMP_COPY;
-                temps[src].next_copy = src;
-                temps[src].prev_copy = src;
-            }
-            temps[dst].state = TCG_TEMP_COPY;
-            temps[dst].next_copy = temps[src].next_copy;
-            temps[dst].prev_copy = src;
-            temps[temps[dst].next_copy].prev_copy = dst;
-            temps[src].next_copy = dst;
+    reset_temp(dst);
+    temps[dst].mask = temps[src].mask;
+    assert(temps[src].state != TCG_TEMP_CONST);
+
+    if (s->temps[src].type == s->temps[dst].type) {
+        if (temps[src].state != TCG_TEMP_COPY) {
+            temps[src].state = TCG_TEMP_COPY;
+            temps[src].next_copy = src;
+            temps[src].prev_copy = src;
         }
+        temps[dst].state = TCG_TEMP_COPY;
+        temps[dst].next_copy = temps[src].next_copy;
+        temps[dst].prev_copy = src;
+        temps[temps[dst].next_copy].prev_copy = dst;
+        temps[src].next_copy = dst;
+    }
 
-        gen_args[0] = dst;
-        gen_args[1] = src;
+    gen_args[0] = dst;
+    gen_args[1] = src;
 }
 
 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 {
-        reset_temp(dst);
-        temps[dst].state = TCG_TEMP_CONST;
-        temps[dst].val = val;
-        gen_args[0] = dst;
-        gen_args[1] = val;
+    reset_temp(dst);
+    temps[dst].state = TCG_TEMP_CONST;
+    temps[dst].val = val;
+    temps[dst].mask = val;
+    gen_args[0] = dst;
+    gen_args[1] = val;
 }
 
 static TCGOpcode op_to_mov(TCGOpcode op)
@@ -479,6 +484,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
 {
     int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
+    tcg_target_ulong mask;
     TCGOpcode op;
     const TCGOpDef *def;
     TCGArg *gen_args;
@@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
+        /* Simplify using known-zero bits */
+        mask = -1;
+        switch (op) {
+        CASE_OP_32_64(ext8s):
+            if ((temps[args[1]].mask & 0x80) != 0) {
+                break;
+            }
+        CASE_OP_32_64(ext8u):
+            mask = 0xff;
+            goto and_const;
+        CASE_OP_32_64(ext16s):
+            if ((temps[args[1]].mask & 0x8000) != 0) {
+                break;
+            }
+        CASE_OP_32_64(ext16u):
+            mask = 0xffff;
+            goto and_const;
+        case INDEX_op_ext32s_i64:
+            if ((temps[args[1]].mask & 0x80000000) != 0) {
+                break;
+            }
+        case INDEX_op_ext32u_i64:
+            mask = 0xffffffffU;
+            goto and_const;
+
+        CASE_OP_32_64(and):
+            mask = temps[args[2]].mask;
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+        and_const:
+                ;
+            }
+            mask = temps[args[1]].mask & mask;
+            break;
+
+        CASE_OP_32_64(sar):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = ((tcg_target_long)temps[args[1]].mask
+                        >> temps[args[2]].val);
+            }
+            break;
+
+        CASE_OP_32_64(shr):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = temps[args[1]].mask >> temps[args[2]].val;
+            }
+            break;
+
+        CASE_OP_32_64(shl):
+            if (temps[args[2]].state == TCG_TEMP_CONST) {
+                mask = temps[args[1]].mask << temps[args[2]].val;
+            }
+            break;
+
+        CASE_OP_32_64(neg):
+            /* Set to 1 all bits to the left of the rightmost.  */
+            mask = -(temps[args[1]].mask & -temps[args[1]].mask);
+            break;
+
+        CASE_OP_32_64(deposit):
+            tmp = ((1ull << args[4]) - 1);
+            mask = ((temps[args[1]].mask & ~(tmp << args[3]))
+                    | ((temps[args[2]].mask & tmp) << args[3]));
+            break;
+
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
+            mask = temps[args[1]].mask | temps[args[2]].mask;
+            break;
+
+        CASE_OP_32_64(setcond):
+            mask = 1;
+            break;
+
+        CASE_OP_32_64(movcond):
+            mask = temps[args[3]].mask | temps[args[4]].mask;
+            break;
+
+        default:
+            break;
+        }
+
         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
         switch (op) {
         CASE_OP_32_64(and):
@@ -947,7 +1034,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             /* Default case: we know nothing about operation (or were unable
                to compute the operation result) so no propagation is done.
                We trash everything if the operation is the end of a basic
-               block, otherwise we only trash the output args.  */
+               block, otherwise we only trash the output args.  "mask" is
+               the non-zero bits mask for the first output arg.  */
             if (def->flags & TCG_OPF_BB_END) {
                 reset_all_temps(nb_temps);
             } else {
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
@ 2013-01-16  9:09 Jay Foad
  2013-01-16 10:47 ` Paolo Bonzini
  0 siblings, 1 reply; 3+ messages in thread
From: Jay Foad @ 2013-01-16  9:09 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paolo Bonzini, qemu-devel

> @@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
> uint16_t *tcg_opc_ptr,
>              break;
>          }
>
> +        /* Simplify using known-zero bits */
> +        mask = -1;
> +        switch (op) {
> +        CASE_OP_32_64(ext8s):
> +            if ((temps[args[1]].mask & 0x80) != 0) {
> +                break;
> +            }
> +        CASE_OP_32_64(ext8u):
> +            mask = 0xff;
> +            goto and_const;
> +        CASE_OP_32_64(ext16s):
> +            if ((temps[args[1]].mask & 0x8000) != 0) {
> +                break;
> +            }
> +        CASE_OP_32_64(ext16u):
> +            mask = 0xffff;
> +            goto and_const;
> +        case INDEX_op_ext32s_i64:
> +            if ((temps[args[1]].mask & 0x80000000) != 0) {
> +                break;
> +            }
> +        case INDEX_op_ext32u_i64:
> +            mask = 0xffffffffU;
> +            goto and_const;
> +
> +        CASE_OP_32_64(and):
> +            mask = temps[args[2]].mask;
> +            if (temps[args[2]].state == TCG_TEMP_CONST) {
> +        and_const:
> +                ;
> +            }

What's the purpose of this "if"?

Jay.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
  2013-01-16  9:09 [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Jay Foad
@ 2013-01-16 10:47 ` Paolo Bonzini
  0 siblings, 0 replies; 3+ messages in thread
From: Paolo Bonzini @ 2013-01-16 10:47 UTC (permalink / raw)
  To: Jay Foad; +Cc: qemu-devel, Richard Henderson

Il 16/01/2013 10:09, Jay Foad ha scritto:
>> @@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
>> uint16_t *tcg_opc_ptr,
>>              break;
>>          }
>>
>> +        /* Simplify using known-zero bits */
>> +        mask = -1;
>> +        switch (op) {
>> +        CASE_OP_32_64(ext8s):
>> +            if ((temps[args[1]].mask & 0x80) != 0) {
>> +                break;
>> +            }
>> +        CASE_OP_32_64(ext8u):
>> +            mask = 0xff;
>> +            goto and_const;
>> +        CASE_OP_32_64(ext16s):
>> +            if ((temps[args[1]].mask & 0x8000) != 0) {
>> +                break;
>> +            }
>> +        CASE_OP_32_64(ext16u):
>> +            mask = 0xffff;
>> +            goto and_const;
>> +        case INDEX_op_ext32s_i64:
>> +            if ((temps[args[1]].mask & 0x80000000) != 0) {
>> +                break;
>> +            }
>> +        case INDEX_op_ext32u_i64:
>> +            mask = 0xffffffffU;
>> +            goto and_const;
>> +
>> +        CASE_OP_32_64(and):
>> +            mask = temps[args[2]].mask;
>> +            if (temps[args[2]].state == TCG_TEMP_CONST) {
>> +        and_const:
>> +                ;
>> +            }
> 
> What's the purpose of this "if"?

It is filled in by patch 3/3.

Paolo

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-01-16 12:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-16  9:09 [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Jay Foad
2013-01-16 10:47 ` Paolo Bonzini
  -- strict thread matches above, loose matches on Subject: below --
2013-01-11 23:42 [Qemu-devel] [PATCH 0/3] tcg-optimize with known-zero bits Richard Henderson
2013-01-11 23:42 ` [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.