[Qemu-devel] [PATCH 0/2] tcg/optimize fix for known-zero bits

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 0/2] tcg/optimize fix for known-zero bits
@ 2014-05-23 18:57 Richard Henderson
  2014-05-23 18:57 ` [Qemu-devel] [PATCH 1/2] tcg/optimize: Move updating of gen_opc_buf into tcg_opt_gen_mov* Richard Henderson
  2014-05-23 18:57 ` [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops Richard Henderson
  0 siblings, 2 replies; 5+ messages in thread
From: Richard Henderson @ 2014-05-23 18:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: HPENNER, aurelien

A bug reported by Hartmut Penner visible on s390 host and ppc64 guest:

IN:
0x00000000001ff378:  lwz     r3,436(r7)
0x00000000001ff37c:  lis     r0,-17874
0x00000000001ff380:  ori     r0,r0,35747
0x00000000001ff384:  mulhwu  r4,r3,r0
0x00000000001ff388:  rlwinm  r5,r4,29,3,31
0x00000000001ff38c:  rldicr  r6,r5,4,59
0x00000000001ff390:  rlwinm  r4,r4,31,1,29
0x00000000001ff394:  subf    r4,r4,r6
0x00000000001ff398:  subf    r4,r5,r4
0x00000000001ff39c:  subf    r3,r4,r3
0x00000000001ff3a0:  cmpwi   r3,0
0x00000000001ff3a4:  bne-    0x1ff494

Excerping the relevant opcodes from the op and op_opt dump:

 ---- 0x1ff378
 qemu_ld32u r3,tmp2,$0x1

 ---- 0x1ff37c
 movi_i64 r0,$0xffffffffba2e0000

 ---- 0x1ff384
 mov_i32 tmp0,r3
 mov_i32 tmp1,r0
 ext32u_i64 tmp2,tmp0

becomes

 ---- 0x1ff378
 qemu_ld32u r3,tmp2,$0x1

 ---- 0x1ff380
 movi_i64 r0,$0xffffffffba2e8ba3

 ---- 0x1ff384
 mov_i32 tmp0,r3			***
 mov_i64 tmp2,tmp0			***

We dropped the ext32u opcode because we "proved" the value was already
zero-extended.  Except we allowed that to go through a 32-bit temporary,
which is illegal.  Ideally we'd have transformed that last to

  mov_i64 tmp2,r3

which would have been fine, but we have no optimization pass that looks
across multiple tcg opcodes.

This specific bug may only be visible on s390.  It is unique in that it
has a 32-bit register move operation that does not modify the high bits
of the 64-bit register.  Unlike x86_64 + aarch64 which zero the high
bits, or sparc64 + ppc64 that copy all 64-bits even for a 32-bit move.

But the point that the high bits of a 32-bit operation must be considered
to be garbage still stands.

Hartmut reports success after this patch set, but didn't explicitly
give me a Tested-by.  ;-)

r~

Richard Henderson (2):
  tcg/optimize: Move updating of gen_opc_buf into tcg_opt_gen_mov*
  tcg/optimize: Remember garbage high bits for 32-bit ops

 tcg/optimize.c | 150 +++++++++++++++++++++++++++++++--------------------------
 1 file changed, 82 insertions(+), 68 deletions(-)

-- 
1.9.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH 1/2] tcg/optimize: Move updating of gen_opc_buf into tcg_opt_gen_mov*
  2014-05-23 18:57 [Qemu-devel] [PATCH 0/2] tcg/optimize fix for known-zero bits Richard Henderson
@ 2014-05-23 18:57 ` Richard Henderson
  2014-05-23 18:57 ` [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops Richard Henderson
  1 sibling, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2014-05-23 18:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: HPENNER, aurelien

No functional change, just reduce a bit of redundancy.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/optimize.c | 117 +++++++++++++++++++++++++++------------------------------
 1 file changed, 56 insertions(+), 61 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 3a504a1..83e1387 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -83,6 +83,20 @@ static int op_bits(TCGOpcode op)
     return def->flags & TCG_OPF_64BIT ? 64 : 32;
 }
 
+static TCGOpcode op_to_mov(TCGOpcode op)
+{
+    switch (op_bits(op)) {
+    case 32:
+        return INDEX_op_mov_i32;
+    case 64:
+        return INDEX_op_mov_i64;
+    default:
+        fprintf(stderr, "op_to_mov: unexpected return value of "
+                "function op_bits.\n");
+        tcg_abort();
+    }
+}
+
 static TCGOpcode op_to_movi(TCGOpcode op)
 {
     switch (op_bits(op)) {
@@ -148,9 +162,13 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
     return false;
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
-                            TCGArg dst, TCGArg src)
+static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
+                            TCGOpcode old_op, TCGArg dst, TCGArg src)
 {
+    TCGOpcode new_op = op_to_mov(old_op);
+
+    s->gen_opc_buf[op_index] = new_op;
+
     reset_temp(dst);
     temps[dst].mask = temps[src].mask;
     assert(temps[src].state != TCG_TEMP_CONST);
@@ -172,8 +190,13 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
     gen_args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
+static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
+                             TCGOpcode old_op, TCGArg dst, TCGArg val)
 {
+    TCGOpcode new_op = op_to_movi(old_op);
+
+    s->gen_opc_buf[op_index] = new_op;
+
     reset_temp(dst);
     temps[dst].state = TCG_TEMP_CONST;
     temps[dst].val = val;
@@ -182,20 +205,6 @@ static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
     gen_args[1] = val;
 }
 
-static TCGOpcode op_to_mov(TCGOpcode op)
-{
-    switch (op_bits(op)) {
-    case 32:
-        return INDEX_op_mov_i32;
-    case 64:
-        return INDEX_op_mov_i64;
-    default:
-        fprintf(stderr, "op_to_mov: unexpected return value of "
-                "function op_bits.\n");
-        tcg_abort();
-    }
-}
-
 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
 {
     uint64_t l64, h64;
@@ -619,8 +628,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(rotr):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -749,8 +757,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps_are_copies(args[0], args[1])) {
                 s->gen_opc_buf[op_index] = INDEX_op_nop;
             } else {
-                s->gen_opc_buf[op_index] = op_to_mov(op);
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
                 gen_args += 2;
             }
             args += 3;
@@ -902,8 +909,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         if (mask == 0) {
             assert(nb_oargs == 1);
-            s->gen_opc_buf[op_index] = op_to_movi(op);
-            tcg_opt_gen_movi(gen_args, args[0], 0);
+            tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
             args += nb_args;
             gen_args += 2;
             continue;
@@ -913,12 +919,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps_are_copies(args[0], args[1])) {
                 s->gen_opc_buf[op_index] = INDEX_op_nop;
             } else if (temps[args[1]].state != TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_mov(op);
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
                 gen_args += 2;
             } else {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
+                tcg_opt_gen_movi(s, op_index, gen_args, op,
+                                 args[0], temps[args[1]].val);
                 gen_args += 2;
             }
             args += nb_args;
@@ -933,8 +938,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(mulsh):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -952,8 +956,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 if (temps_are_copies(args[0], args[1])) {
                     s->gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
-                    s->gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                    tcg_opt_gen_mov(s, op_index, gen_args, op,
+                                    args[0], args[1]);
                     gen_args += 2;
                 }
                 args += 3;
@@ -970,8 +974,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(sub):
         CASE_OP_32_64(xor):
             if (temps_are_copies(args[1], args[2])) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
                 gen_args += 2;
                 args += 3;
                 continue;
@@ -992,19 +995,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
                 gen_args += 2;
                 args += 2;
                 break;
             }
             /* Source argument is constant.  Rewrite the operation and
                let movi case handle it. */
-            op = op_to_movi(op);
-            s->gen_opc_buf[op_index] = op;
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(gen_args, args[0], args[1]);
+            tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]);
             gen_args += 2;
             args += 2;
             break;
@@ -1018,9 +1019,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
                 gen_args += 2;
                 args += 2;
                 break;
@@ -1029,9 +1029,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         case INDEX_op_trunc_shr_i32:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, args[2]);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
                 gen_args += 2;
                 args += 3;
                 break;
@@ -1062,10 +1061,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(remu):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
                 gen_args += 2;
                 args += 3;
                 break;
@@ -1075,10 +1073,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
                                 temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
                 gen_args += 2;
                 args += 5;
                 break;
@@ -1088,8 +1085,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
                 gen_args += 2;
                 args += 4;
                 break;
@@ -1118,12 +1114,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 if (temps_are_copies(args[0], args[4-tmp])) {
                     s->gen_opc_buf[op_index] = INDEX_op_nop;
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
-                    s->gen_opc_buf[op_index] = op_to_movi(op);
-                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
+                    tcg_opt_gen_movi(s, op_index, gen_args, op,
+                                     args[0], temps[args[4-tmp]].val);
                     gen_args += 2;
                 } else {
-                    s->gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
+                    tcg_opt_gen_mov(s, op_index, gen_args, op,
+                                    args[0], args[4-tmp]);
                     gen_args += 2;
                 }
                 args += 6;
@@ -1156,10 +1152,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
                 rl = args[0];
                 rh = args[1];
-                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
-                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
-                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                tcg_opt_gen_movi(s, op_index, &gen_args[0],
+                                 op, rl, (uint32_t)a);
+                tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
+                                 op, rh, (uint32_t)(a >> 32));
                 gen_args += 4;
                 args += 6;
                 break;
@@ -1179,10 +1175,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
                 rl = args[0];
                 rh = args[1];
-                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
-                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
-                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                tcg_opt_gen_movi(s, op_index, &gen_args[0],
+                                 op, rl, (uint32_t)r);
+                tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
+                                 op, rh, (uint32_t)(r >> 32));
                 gen_args += 4;
                 args += 4;
                 break;
@@ -1223,8 +1219,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_setcond2_i32:
             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
             if (tmp != 2) {
-                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
                 gen_args += 2;
             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
                        && temps[args[3]].state == TCG_TEMP_CONST
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops
  2014-05-23 18:57 [Qemu-devel] [PATCH 0/2] tcg/optimize fix for known-zero bits Richard Henderson
  2014-05-23 18:57 ` [Qemu-devel] [PATCH 1/2] tcg/optimize: Move updating of gen_opc_buf into tcg_opt_gen_mov* Richard Henderson
@ 2014-05-23 18:57 ` Richard Henderson
  2014-05-23 19:46   ` Paolo Bonzini
  1 sibling, 1 reply; 5+ messages in thread
From: Richard Henderson @ 2014-05-23 18:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: HPENNER, aurelien

For a 64-bit host, the high bits of a register after a 32-bit operation
are undefined.  Adjust the temps mask for all 32-bit ops to reflect that.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/optimize.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 83e1387..19e4831 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -166,11 +166,18 @@ static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
                             TCGOpcode old_op, TCGArg dst, TCGArg src)
 {
     TCGOpcode new_op = op_to_mov(old_op);
+    tcg_target_ulong mask;
 
     s->gen_opc_buf[op_index] = new_op;
 
     reset_temp(dst);
-    temps[dst].mask = temps[src].mask;
+    mask = temps[src].mask;
+    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
+        /* High bits of the destination are now garbage.  */
+        mask |= ~0xffffffffull;
+    }
+    temps[dst].mask = mask;
+
     assert(temps[src].state != TCG_TEMP_CONST);
 
     if (s->temps[src].type == s->temps[dst].type) {
@@ -194,13 +201,20 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
                              TCGOpcode old_op, TCGArg dst, TCGArg val)
 {
     TCGOpcode new_op = op_to_movi(old_op);
+    tcg_target_ulong mask;
 
     s->gen_opc_buf[op_index] = new_op;
 
     reset_temp(dst);
     temps[dst].state = TCG_TEMP_CONST;
     temps[dst].val = val;
-    temps[dst].mask = val;
+    mask = val;
+    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
+        /* High bits of the destination are now garbage.  */
+        mask |= ~0xffffffffull;
+    }
+    temps[dst].mask = mask;
+
     gen_args[0] = dst;
     gen_args[1] = val;
 }
@@ -539,7 +553,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     for (op_index = 0; op_index < nb_ops; op_index++) {
         TCGOpcode op = s->gen_opc_buf[op_index];
         const TCGOpDef *def = &tcg_op_defs[op];
-        tcg_target_ulong mask, affected;
+        tcg_target_ulong mask, partmask, affected;
         int nb_oargs, nb_iargs, nb_args, i;
         TCGArg tmp;
 
@@ -901,13 +915,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
-        /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
-           results */
+        /* 32-bit ops (non 64-bit ops and non load/store ops) generate
+           32-bit results.  For the result is zero test below, we can
+           ignore high bits, but for further optimizations we need to
+           record that the high bits contain garbage.  */
+        partmask = mask;
         if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
-            mask &= 0xffffffffu;
+            mask |= ~(tcg_target_ulong)0xffffffffu;
+            partmask &= 0xffffffffu;
+            affected &= 0xffffffffu;
         }
 
-        if (mask == 0) {
+        if (partmask == 0) {
             assert(nb_oargs == 1);
             tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
             args += nb_args;
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops
  2014-05-23 18:57 ` [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops Richard Henderson
@ 2014-05-23 19:46   ` Paolo Bonzini
  2014-05-23 20:01     ` Richard Henderson
  0 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2014-05-23 19:46 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: aurelien, HPENNER

Il 23/05/2014 20:57, Richard Henderson ha scritto:
> For a 64-bit host, the high bits of a register after a 32-bit operation
> are undefined.  Adjust the temps mask for all 32-bit ops to reflect that.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/optimize.c | 33 ++++++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 83e1387..19e4831 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -166,11 +166,18 @@ static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
>                              TCGOpcode old_op, TCGArg dst, TCGArg src)
>  {
>      TCGOpcode new_op = op_to_mov(old_op);
> +    tcg_target_ulong mask;
>
>      s->gen_opc_buf[op_index] = new_op;
>
>      reset_temp(dst);
> -    temps[dst].mask = temps[src].mask;
> +    mask = temps[src].mask;
> +    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
> +        /* High bits of the destination are now garbage.  */

Or they are zero on x86_64... perhaps this could be some kind of TCG 
target hook.

> +        mask |= ~0xffffffffull;
> +    }
> +    temps[dst].mask = mask;
> +
>      assert(temps[src].state != TCG_TEMP_CONST);
>
>      if (s->temps[src].type == s->temps[dst].type) {
> @@ -194,13 +201,20 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
>                               TCGOpcode old_op, TCGArg dst, TCGArg val)
>  {
>      TCGOpcode new_op = op_to_movi(old_op);
> +    tcg_target_ulong mask;
>
>      s->gen_opc_buf[op_index] = new_op;
>
>      reset_temp(dst);
>      temps[dst].state = TCG_TEMP_CONST;
>      temps[dst].val = val;
> -    temps[dst].mask = val;
> +    mask = val;
> +    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
> +        /* High bits of the destination are now garbage.  */
> +        mask |= ~0xffffffffull;
> +    }
> +    temps[dst].mask = mask;
> +
>      gen_args[0] = dst;
>      gen_args[1] = val;
>  }
> @@ -539,7 +553,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
>      for (op_index = 0; op_index < nb_ops; op_index++) {
>          TCGOpcode op = s->gen_opc_buf[op_index];
>          const TCGOpDef *def = &tcg_op_defs[op];
> -        tcg_target_ulong mask, affected;
> +        tcg_target_ulong mask, partmask, affected;
>          int nb_oargs, nb_iargs, nb_args, i;
>          TCGArg tmp;
>
> @@ -901,13 +915,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
>              break;
>          }
>
> -        /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
> -           results */
> +        /* 32-bit ops (non 64-bit ops and non load/store ops) generate
> +           32-bit results.  For the result is zero test below, we can
> +           ignore high bits, but for further optimizations we need to
> +           record that the high bits contain garbage.  */
> +        partmask = mask;
>          if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
> -            mask &= 0xffffffffu;
> +            mask |= ~(tcg_target_ulong)0xffffffffu;
> +            partmask &= 0xffffffffu;
> +            affected &= 0xffffffffu;
>          }
>
> -        if (mask == 0) {
> +        if (partmask == 0) {
>              assert(nb_oargs == 1);
>              tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
>              args += nb_args;
>

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops
  2014-05-23 19:46   ` Paolo Bonzini
@ 2014-05-23 20:01     ` Richard Henderson
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2014-05-23 20:01 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: aurelien, HPENNER

On 05/23/2014 12:46 PM, Paolo Bonzini wrote:
>> @@ -166,11 +166,18 @@ static void tcg_opt_gen_mov(TCGContext *s, int
>> op_index, TCGArg *gen_args,
>>                              TCGOpcode old_op, TCGArg dst, TCGArg src)
>>  {
>>      TCGOpcode new_op = op_to_mov(old_op);
>> +    tcg_target_ulong mask;
>>
>>      s->gen_opc_buf[op_index] = new_op;
>>
>>      reset_temp(dst);
>> -    temps[dst].mask = temps[src].mask;
>> +    mask = temps[src].mask;
>> +    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
>> +        /* High bits of the destination are now garbage.  */
> 
> Or they are zero on x86_64... perhaps this could be some kind of TCG target hook.

Only if we actually issued a "mov" insn.

If the register allocator decided to adjust its tables instead, we won't issue
an insn, at which point we really do have garbage in the high bits.  I don't
think we should be thinking about the target at all at this point.


r~

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-05-23 20:01 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-23 18:57 [Qemu-devel] [PATCH 0/2] tcg/optimize fix for known-zero bits Richard Henderson
2014-05-23 18:57 ` [Qemu-devel] [PATCH 1/2] tcg/optimize: Move updating of gen_opc_buf into tcg_opt_gen_mov* Richard Henderson
2014-05-23 18:57 ` [Qemu-devel] [PATCH 2/2] tcg/optimize: Remember garbage high bits for 32-bit ops Richard Henderson
2014-05-23 19:46   ` Paolo Bonzini
2014-05-23 20:01     ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).