qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] Shifts, ppc[64], xtensa
@ 2012-09-18 19:52 malc
  2012-09-18 23:20 ` Max Filippov
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: malc @ 2012-09-18 19:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: Max Filippov, Richard Henderson


Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa 
exposed another bug in power's tcg - gototb's target was expected to be
always filled via tb_set_jmp_target (even though it's clearly not what
tcg/README prescribes, sorry about that).

Thanks to Max Filippov for pointing to xtensa test suite that helped to
narrow the search to gototb.

Testing of the following with other targets on ppc flavours is welcome..

P.S. Xtensa does mighty weird things with shifts i must say...

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 26c4b33..08f62fa 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val,
 
 #define TW     XO31(4)
 #define TRAP   (TW | TO (31))
+#define NOP    0x60000000
 
 #define RT(r) ((r)<<21)
 #define RS(r) ((r)<<21)
@@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
             *ptr = 0x48000000 | (disp & 0x03fffffc); /* b disp */
             patch_size = 4;
         } else {
-            ptr[0] = 0x60000000; /* nop */
-            ptr[1] = 0x60000000;
-            ptr[2] = 0x60000000;
-            ptr[3] = 0x60000000;
+            ptr[0] = NOP;
+            ptr[1] = NOP;
+            ptr[2] = NOP;
+            ptr[3] = NOP;
             patch_size = 16;
         }
     }
@@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
             /* direct jump method */
 
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            s->code_ptr += 16;
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
         }
         else {
             tcg_abort ();
@@ -1565,12 +1569,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 
     case INDEX_op_shl_i32:
         if (const_args[2]) {
+            if (args[2] > 31) {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
+                tcg_out32 (s, SLW | SAB (args[1], args[0], 0));
+            }
             tcg_out32 (s, (RLWINM
                            | RA (args[0])
                            | RS (args[1])
-                           | SH (args[2])
+                           | SH (args[2] & 31)
                            | MB (0)
-                           | ME (31 - args[2])
+                           | ME (31 - (args[2] & 31))
                            )
                 );
         }
@@ -1579,21 +1587,35 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
     case INDEX_op_shr_i32:
         if (const_args[2]) {
-            tcg_out32 (s, (RLWINM
-                           | RA (args[0])
-                           | RS (args[1])
-                           | SH (32 - args[2])
-                           | MB (args[2])
-                           | ME (31)
-                           )
-                );
+            if (args[2] > 31) {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
+                tcg_out32 (s, SRW | SAB (args[1], args[0], 0));
+            }
+            else {
+                tcg_out32 (s, (RLWINM
+                               | RA (args[0])
+                               | RS (args[1])
+                               | SH (32 - args[2])
+                               | MB (args[2])
+                               | ME (31)
+                               )
+                    );
+            }
         }
-        else
+        else {
             tcg_out32 (s, SRW | SAB (args[1], args[0], args[2]));
+        }
         break;
     case INDEX_op_sar_i32:
-        if (const_args[2])
-            tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
+        if (const_args[2]) {
+            if (args[2] > 31) {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
+                tcg_out32 (s, SRAW | SAB (args[1], args[0], 0));
+            }
+            else {
+                tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
+            }
+        }
         else
             tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2]));
         break;
@@ -1604,7 +1626,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                 | RS (args[1])
                 | MB (0)
                 | ME (31)
-                | (const_args[2] ? RLWINM | SH (args[2])
+                | (const_args[2] ? RLWINM | SH (args[2] & 31)
                                  : RLWNM | RB (args[2]))
                 ;
             tcg_out32 (s, op);
@@ -1619,7 +1641,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                 tcg_out32 (s, RLWINM
                            | RA (args[0])
                            | RS (args[1])
-                           | SH (32 - args[2])
+                           | SH (32 - (args[2] & 31))
                            | MB (0)
                            | ME (31)
                     );
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 337cd41..5da7dc4 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -390,6 +390,7 @@ static int tcg_target_const_match (tcg_target_long val,
 
 #define TW     XO31( 4)
 #define TRAP   (TW | TO (31))
+#define NOP    0x60000000
 
 #define RT(r) ((r)<<21)
 #define RS(r) ((r)<<21)
@@ -1225,7 +1226,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
             /* direct jump method */
 
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            s->code_ptr += 28;
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
         }
         else {
             tcg_abort ();
@@ -1430,21 +1437,36 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
     case INDEX_op_shr_i32:
         if (const_args[2]) {
-            tcg_out32 (s, (RLWINM
-                           | RA (args[0])
-                           | RS (args[1])
-                           | SH (32 - args[2])
-                           | MB (args[2])
-                           | ME (31)
-                           )
-                );
+            if (args[2] > 31) {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
+                tcg_out32 (s, SRW | SAB (args[1], args[0], 0));
+            }
+            else {
+                tcg_out32 (s, (RLWINM
+                               | RA (args[0])
+                               | RS (args[1])
+                               | SH (32 - args[2])
+                               | MB (args[2])
+                               | ME (31)
+                               )
+                    );
+            }
         }
-        else
+        else {
             tcg_out32 (s, SRW | SAB (args[1], args[0], args[2]));
+        }
+        break;
         break;
     case INDEX_op_sar_i32:
-        if (const_args[2])
-            tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
+        if (const_args[2]) {
+            if (args[2] > 31) {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
+                tcg_out32 (s, SRAW | SAB (args[1], args[0], 0));
+            }
+            else {
+                tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
+            }
+        }
         else
             tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2]));
         break;

-- 
mailto:av1474@comtv.ru

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-18 19:52 [Qemu-devel] Shifts, ppc[64], xtensa malc
@ 2012-09-18 23:20 ` Max Filippov
  2012-09-19 12:49   ` malc
  2012-09-19  0:10 ` Richard Henderson
  2012-09-19 12:57 ` Peter Maydell
  2 siblings, 1 reply; 22+ messages in thread
From: Max Filippov @ 2012-09-18 23:20 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel, Richard Henderson

On Tue, Sep 18, 2012 at 11:52 PM, malc <av1474@comtv.ru> wrote:
>
> Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa

malc, could you please expand a little bit what are these shift issues?
(sounds like a modern trend, I must have missed something)

> exposed another bug in power's tcg - gototb's target was expected to be
> always filled via tb_set_jmp_target (even though it's clearly not what
> tcg/README prescribes, sorry about that).

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-18 19:52 [Qemu-devel] Shifts, ppc[64], xtensa malc
  2012-09-18 23:20 ` Max Filippov
@ 2012-09-19  0:10 ` Richard Henderson
  2012-09-19 12:46   ` malc
  2012-09-19 12:57 ` Peter Maydell
  2 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2012-09-19  0:10 UTC (permalink / raw)
  To: malc; +Cc: Max Filippov, qemu-devel

On 09/18/2012 12:52 PM, malc wrote:
>      case INDEX_op_shl_i32:
>          if (const_args[2]) {
> +            if (args[2] > 31) {
> +                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
> +                tcg_out32 (s, SLW | SAB (args[1], args[0], 0));
> +            }

What's this bit for?

AFAIK all you should need are the added & 31 below.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19  0:10 ` Richard Henderson
@ 2012-09-19 12:46   ` malc
  0 siblings, 0 replies; 22+ messages in thread
From: malc @ 2012-09-19 12:46 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Max Filippov, qemu-devel

On Tue, 18 Sep 2012, Richard Henderson wrote:

> On 09/18/2012 12:52 PM, malc wrote:
> >      case INDEX_op_shl_i32:
> >          if (const_args[2]) {
> > +            if (args[2] > 31) {
> > +                tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
> > +                tcg_out32 (s, SLW | SAB (args[1], args[0], 0));
> > +            }
> 
> What's this bit for?

This bit is to offset for the fact that i haven't slept in a while, and...

> 
> AFAIK all you should need are the added & 31 below.
> 

Not really - due to the 0 corner case, following should be better:

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 26c4b33..784b157 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val,
 
 #define TW     XO31(4)
 #define TRAP   (TW | TO (31))
+#define NOP    0x60000000
 
 #define RT(r) ((r)<<21)
 #define RS(r) ((r)<<21)
@@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
             *ptr = 0x48000000 | (disp & 0x03fffffc); /* b disp */
             patch_size = 4;
         } else {
-            ptr[0] = 0x60000000; /* nop */
-            ptr[1] = 0x60000000;
-            ptr[2] = 0x60000000;
-            ptr[3] = 0x60000000;
+            ptr[0] = NOP;
+            ptr[1] = NOP;
+            ptr[2] = NOP;
+            ptr[3] = NOP;
             patch_size = 16;
         }
     }
@@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
             /* direct jump method */
 
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            s->code_ptr += 16;
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
+            tcg_out32 (s, NOP);
         }
         else {
             tcg_abort ();
@@ -1565,35 +1569,54 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 
     case INDEX_op_shl_i32:
         if (const_args[2]) {
-            tcg_out32 (s, (RLWINM
-                           | RA (args[0])
-                           | RS (args[1])
-                           | SH (args[2])
-                           | MB (0)
-                           | ME (31 - args[2])
-                           )
-                );
+            int sh = args[2] & 31;
+            if (!sh) {
+                tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
+            }
+            else {
+                tcg_out32 (s, (RLWINM
+                               | RA (args[0])
+                               | RS (args[1])
+                               | SH (sh)
+                               | MB (0)
+                               | ME (31 - sh)
+                               )
+                    );
+            }
         }
         else
             tcg_out32 (s, SLW | SAB (args[1], args[0], args[2]));
         break;
     case INDEX_op_shr_i32:
         if (const_args[2]) {
-            tcg_out32 (s, (RLWINM
-                           | RA (args[0])
-                           | RS (args[1])
-                           | SH (32 - args[2])
-                           | MB (args[2])
-                           | ME (31)
-                           )
-                );
+            int sh = args[2] & 31;
+            if (!sh) {
+                tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
+            }
+            else {
+                tcg_out32 (s, (RLWINM
+                               | RA (args[0])
+                               | RS (args[1])
+                               | SH (32 - sh)
+                               | MB (sh)
+                               | ME (31)
+                               )
+                    );
+            }
         }
         else
             tcg_out32 (s, SRW | SAB (args[1], args[0], args[2]));
         break;
     case INDEX_op_sar_i32:
-        if (const_args[2])
-            tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2]));
+        if (const_args[2]) {
+            int sh = args[2] & 31;
+            if (!sh) {
+                tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
+            }
+            else {
+                tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (sh));
+            }
+        }
         else
             tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2]));
         break;
@@ -1604,7 +1627,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                 | RS (args[1])
                 | MB (0)
                 | ME (31)
-                | (const_args[2] ? RLWINM | SH (args[2])
+                | (const_args[2] ? RLWINM | SH (args[2] & 31)
                                  : RLWNM | RB (args[2]))
                 ;
             tcg_out32 (s, op);


This expects deposit/rotr constant arguments to be sane,
i'm no longer certain this assumption is a sane one though...

-- 
mailto:av1474@comtv.ru

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-18 23:20 ` Max Filippov
@ 2012-09-19 12:49   ` malc
  2012-09-19 15:00     ` Max Filippov
  0 siblings, 1 reply; 22+ messages in thread
From: malc @ 2012-09-19 12:49 UTC (permalink / raw)
  To: Max Filippov; +Cc: qemu-devel, Richard Henderson

On Wed, 19 Sep 2012, Max Filippov wrote:

> On Tue, Sep 18, 2012 at 11:52 PM, malc <av1474@comtv.ru> wrote:
> >
> > Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa
> 
> malc, could you please expand a little bit what are these shift issues?
> (sounds like a modern trend, I must have missed something)
> 

Just audit op_opt output of extensa on 32bit host for shr_i32, i'm getting
things like:

OP after optimization and liveness analysis:
 movi_i32 tmp0,$0xd00793f4
 movi_i32 tmp1,$0x1
 movi_i32 tmp2,$0x1
 movi_i32 tmp3,$advance_ccount
 call tmp3,$0x0,$0,env,tmp2
 movi_i32 tmp2,$window_check
 call tmp2,$0x0,$0,env,tmp0,tmp1
 movi_i32 tmp0,$0x1
 add_i32 ar4,ar4,tmp0
 movi_i32 tmp1,$0xd00793f6
 movi_i32 tmp2,$0x3
 movi_i32 tmp3,$0x1
 movi_i32 tmp4,$advance_ccount
 call tmp4,$0x0,$0,env,tmp3
 movi_i32 tmp3,$window_check
 call tmp3,$0x0,$0,env,tmp1,tmp2
 mov_i32 tmp0,ar4
 qemu_ld8u ar12,tmp0,$0x0
 movi_i32 tmp0,$0xffffffe0
 add_i32 ar9,ar12,tmp0
<<<
 movi_i32 tmp1,$0x40
 shr_i32 tmp0,ar9,tmp1
<<<

# I don't get the above
 movi_i32 tmp1,$0xff
 and_i32 ar9,tmp0,tmp1
# Nor the continuation

 movi_i32 tmp0,$0x3
 movi_i32 tmp1,$advance_ccount
 call tmp1,$0x0,$0,env,tmp0
 brcond_i32 ar6,ar9,ltu,$0x0
 nopn $0x2,$0x2
 movi_i32 pc,$0xd0079402
 goto_tb $0x0
 exit_tb $0x4a036c68
 set_label $0x0
 nopn $0x2,$0x2
 movi_i32 pc,$0xd0079430
 goto_tb $0x1
 exit_tb $0x4a036c69
 end

Just adding some debugging printfs to constant shift paths in tcg target
can also be useful.

> > exposed another bug in power's tcg - gototb's target was expected to be
> > always filled via tb_set_jmp_target (even though it's clearly not what
> > tcg/README prescribes, sorry about that).
> 
> 

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-18 19:52 [Qemu-devel] Shifts, ppc[64], xtensa malc
  2012-09-18 23:20 ` Max Filippov
  2012-09-19  0:10 ` Richard Henderson
@ 2012-09-19 12:57 ` Peter Maydell
  2012-09-19 17:00   ` Richard Henderson
  2 siblings, 1 reply; 22+ messages in thread
From: Peter Maydell @ 2012-09-19 12:57 UTC (permalink / raw)
  To: malc; +Cc: Max Filippov, qemu-devel, Aurelien Jarno, Richard Henderson

On 18 September 2012 20:52, malc <av1474@comtv.ru> wrote:
>
> Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa
> exposed another bug in power's tcg - gototb's target was expected to be
> always filled via tb_set_jmp_target (even though it's clearly not what
> tcg/README prescribes, sorry about that).
>
> Thanks to Max Filippov for pointing to xtensa test suite that helped to
> narrow the search to gototb.
>
> Testing of the following with other targets on ppc flavours is welcome..
>
> P.S. Xtensa does mighty weird things with shifts i must say...
>
> diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
> index 26c4b33..08f62fa 100644
> --- a/tcg/ppc/tcg-target.c
> +++ b/tcg/ppc/tcg-target.c
> @@ -409,6 +409,7 @@ static int tcg_target_const_match(tcg_target_long val,
>
>  #define TW     XO31(4)
>  #define TRAP   (TW | TO (31))
> +#define NOP    0x60000000
>
>  #define RT(r) ((r)<<21)
>  #define RS(r) ((r)<<21)
> @@ -1306,10 +1307,10 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
>              *ptr = 0x48000000 | (disp & 0x03fffffc); /* b disp */
>              patch_size = 4;
>          } else {
> -            ptr[0] = 0x60000000; /* nop */
> -            ptr[1] = 0x60000000;
> -            ptr[2] = 0x60000000;
> -            ptr[3] = 0x60000000;
> +            ptr[0] = NOP;
> +            ptr[1] = NOP;
> +            ptr[2] = NOP;
> +            ptr[3] = NOP;
>              patch_size = 16;
>          }
>      }
> @@ -1330,7 +1331,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>              /* direct jump method */
>
>              s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> -            s->code_ptr += 16;
> +            tcg_out32 (s, NOP);
> +            tcg_out32 (s, NOP);
> +            tcg_out32 (s, NOP);
> +            tcg_out32 (s, NOP);

Not too familiar with the PPC backend, but doesn't this mean that
in the retranslation case we will overwrite a correct jump destination
with these NOP words and then rewrite it again with the correct
destination? That can cause problems with cache incoherency;
compare the fix applied in commit c69806ab8276 for ARM.

thanks
-- PMM

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 12:49   ` malc
@ 2012-09-19 15:00     ` Max Filippov
  0 siblings, 0 replies; 22+ messages in thread
From: Max Filippov @ 2012-09-19 15:00 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel, Richard Henderson

On Wed, Sep 19, 2012 at 4:49 PM, malc <av1474@comtv.ru> wrote:
> On Wed, 19 Sep 2012, Max Filippov wrote:
>
>> On Tue, Sep 18, 2012 at 11:52 PM, malc <av1474@comtv.ru> wrote:
>> >
>> > Looks like PPC/PPC64 is also hit by shift issues, on top of that xtensa
>>
>> malc, could you please expand a little bit what are these shift issues?
>> (sounds like a modern trend, I must have missed something)
>>
>
> Just audit op_opt output of extensa on 32bit host for shr_i32, i'm getting
> things like:
>
> OP after optimization and liveness analysis:
>  movi_i32 tmp0,$0xd00793f4
>  movi_i32 tmp1,$0x1
>  movi_i32 tmp2,$0x1
>  movi_i32 tmp3,$advance_ccount
>  call tmp3,$0x0,$0,env,tmp2
>  movi_i32 tmp2,$window_check
>  call tmp2,$0x0,$0,env,tmp0,tmp1
>  movi_i32 tmp0,$0x1
>  add_i32 ar4,ar4,tmp0
>  movi_i32 tmp1,$0xd00793f6
>  movi_i32 tmp2,$0x3
>  movi_i32 tmp3,$0x1
>  movi_i32 tmp4,$advance_ccount
>  call tmp4,$0x0,$0,env,tmp3
>  movi_i32 tmp3,$window_check
>  call tmp3,$0x0,$0,env,tmp1,tmp2
>  mov_i32 tmp0,ar4
>  qemu_ld8u ar12,tmp0,$0x0
>  movi_i32 tmp0,$0xffffffe0
>  add_i32 ar9,ar12,tmp0
> <<<
>  movi_i32 tmp1,$0x40
>  shr_i32 tmp0,ar9,tmp1
> <<<

Thanks for the report, this stands for extui, should be fixed
with the following:

-- >8 --
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 19 Sep 2012 18:58:30 +0400
Subject: [PATCH] target-xtensa: fix extui shift amount

extui opcode only uses lowermost op1 bit for sa4.

Reported-by: malc <av1474@comtv.ru>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 target-xtensa/translate.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 1900bd5..63b37b3 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -1778,7 +1778,7 @@ static void disas_xtensa_insn(DisasContext *dc)
         case 5:
             gen_window_check2(dc, RRR_R, RRR_T);
             {
-                int shiftimm = RRR_S | (OP1 << 4);
+                int shiftimm = RRR_S | ((OP1 & 1) << 4);
                 int maskimm = (1 << (OP2 + 1)) - 1;

                 TCGv_i32 tmp = tcg_temp_new_i32();
--
1.7.5.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 12:57 ` Peter Maydell
@ 2012-09-19 17:00   ` Richard Henderson
  2012-09-19 17:02     ` Richard Henderson
  2012-09-19 17:11     ` Peter Maydell
  0 siblings, 2 replies; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 17:00 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 09/19/2012 05:57 AM, Peter Maydell wrote:
>> > -            s->code_ptr += 16;
>> > +            tcg_out32 (s, NOP);
>> > +            tcg_out32 (s, NOP);
>> > +            tcg_out32 (s, NOP);
>> > +            tcg_out32 (s, NOP);
> Not too familiar with the PPC backend, but doesn't this mean that
> in the retranslation case we will overwrite a correct jump destination
> with these NOP words and then rewrite it again with the correct
> destination? That can cause problems with cache incoherency;
> compare the fix applied in commit c69806ab8276 for ARM.

Well, i386 certainly doesn't care about re-translation here:

            /* direct jump method */
            tcg_out8(s, OPC_JMP_long); /* jmp im */
            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
            tcg_out32(s, 0);

That creates an explicit branch to next.

And as far as the referenced change, that has to do with "real"
branches, i.e. INDEX_op_brcond et at.  Which *do* need to be
protected against retranslation.  But INDEX_op_goto_tb is a
different case.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 17:00   ` Richard Henderson
@ 2012-09-19 17:02     ` Richard Henderson
  2012-09-19 17:11     ` Peter Maydell
  1 sibling, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 17:02 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 09/19/2012 10:00 AM, Richard Henderson wrote:
> And as far as the referenced change, that has to do with "real"
> branches, i.e. INDEX_op_brcond et at.  Which *do* need to be
> protected against retranslation.  But INDEX_op_goto_tb is a
> different case.

Err, well, that patch *does* change INDEX_op_goto_tb, but I think that's
wrong.  It should have only changed INDEX_op_brcond.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 17:00   ` Richard Henderson
  2012-09-19 17:02     ` Richard Henderson
@ 2012-09-19 17:11     ` Peter Maydell
  2012-09-19 17:30       ` Richard Henderson
  1 sibling, 1 reply; 22+ messages in thread
From: Peter Maydell @ 2012-09-19 17:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 19 September 2012 18:00, Richard Henderson <rth@twiddle.net> wrote:
> On 09/19/2012 05:57 AM, Peter Maydell wrote:
>>> > -            s->code_ptr += 16;
>>> > +            tcg_out32 (s, NOP);
>>> > +            tcg_out32 (s, NOP);
>>> > +            tcg_out32 (s, NOP);
>>> > +            tcg_out32 (s, NOP);
>> Not too familiar with the PPC backend, but doesn't this mean that
>> in the retranslation case we will overwrite a correct jump destination
>> with these NOP words and then rewrite it again with the correct
>> destination? That can cause problems with cache incoherency;
>> compare the fix applied in commit c69806ab8276 for ARM.
>
> Well, i386 certainly doesn't care about re-translation here:

i386 is weird (as usual) in that it maintains i/d side cache
coherency entirely automatically. I don't know about PPC,
but explicit cache maintenance is more common for RISC...

> And as far as the referenced change, that has to do with "real"
> branches, i.e. INDEX_op_brcond et at.  Which *do* need to be
> protected against retranslation.  But INDEX_op_goto_tb is a
> different case.

Can you elaborate? If we're emitting a native branch insn
and we're potentially changing the value in memory several
times during retranslate I would have thought it still applied.

-- PMM

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 17:11     ` Peter Maydell
@ 2012-09-19 17:30       ` Richard Henderson
  2012-09-19 17:51         ` Aurelien Jarno
  0 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 17:30 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 09/19/2012 10:11 AM, Peter Maydell wrote:
> Can you elaborate? If we're emitting a native branch insn
> and we're potentially changing the value in memory several
> times during retranslate I would have thought it still applied.

For brcond, we always apply the relocation before we ever try to execute the TB.

For goto_tb, we expect the contents of the patch to contain valid insns from the
start.  We never apply a "null" relocation there.  Perhaps this should be considerd
a bug in cpu_gen_code, but that's where we are.

I'm frankly surprised this ever works on ARM...



r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 17:30       ` Richard Henderson
@ 2012-09-19 17:51         ` Aurelien Jarno
  2012-09-19 18:01           ` Richard Henderson
  0 siblings, 1 reply; 22+ messages in thread
From: Aurelien Jarno @ 2012-09-19 17:51 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Peter Maydell, qemu-devel, Max Filippov

On Wed, Sep 19, 2012 at 10:30:04AM -0700, Richard Henderson wrote:
> On 09/19/2012 10:11 AM, Peter Maydell wrote:
> > Can you elaborate? If we're emitting a native branch insn
> > and we're potentially changing the value in memory several
> > times during retranslate I would have thought it still applied.
> 
> For brcond, we always apply the relocation before we ever try to execute the TB.
> 
> For goto_tb, we expect the contents of the patch to contain valid insns from the
> start.  We never apply a "null" relocation there.  Perhaps this should be considerd
> a bug in cpu_gen_code, but that's where we are.
> 

There is no cache problem in that case, because the cache are
synchronized just after the value is modified (see exec-all.h).

That said it is not a valid reason to not keep the value during
re-translation, as it means the TB will exit instead of linking to 
the next one. The consequences are only the performance.

The real problem in the PPC case is that changing the jump address is 
not atomic. In some rare cases (but given the number of executed TB in
system mode, it should be considered as probable), this might cause a 
jump to the wrong address. This is especially true when doing TB 
unlinking. That's one of the reason we are not doing direct jump on MIPS
for example (that should be something possible, but with a lot of
constraints on the size and location of the code gen buffer).

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 17:51         ` Aurelien Jarno
@ 2012-09-19 18:01           ` Richard Henderson
  2012-09-19 18:30             ` Peter Maydell
  0 siblings, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 18:01 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: Peter Maydell, qemu-devel, Max Filippov

On 09/19/2012 10:51 AM, Aurelien Jarno wrote:
> That said it is not a valid reason to not keep the value during
> re-translation, as it means the TB will exit instead of linking to 
> the next one. The consequences are only the performance.

We still have the problem of when is the goto_tb link initialized the *first* time?
Where we expect the goto_tb to fall through to stuff+exit_tb?

For i386 it's during translation, with no care for re-translation.

For ARM?  I can't see that it is.

For PPC, malc has already verified that it *never* happens.  If he
puts "trap" insns there instead of "nop" insns, he'll see the trap.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 18:01           ` Richard Henderson
@ 2012-09-19 18:30             ` Peter Maydell
  2012-09-19 18:35               ` Richard Henderson
  2012-09-19 19:53               ` Richard Henderson
  0 siblings, 2 replies; 22+ messages in thread
From: Peter Maydell @ 2012-09-19 18:30 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 19 September 2012 19:01, Richard Henderson <rth@twiddle.net> wrote:
> On 09/19/2012 10:51 AM, Aurelien Jarno wrote:
>> That said it is not a valid reason to not keep the value during
>> re-translation, as it means the TB will exit instead of linking to
>> the next one. The consequences are only the performance.
>
> We still have the problem of when is the goto_tb link initialized the *first* time?
> Where we expect the goto_tb to fall through to stuff+exit_tb?
>
> For i386 it's during translation, with no care for re-translation.
>
> For ARM?  I can't see that it is.

I think the answer to this is that the only caller of cpu_gen_code()
is tb_gen_code(), which always then calls tb_link_page()
which calls tb_reset_jump() which calls tb_set_jmp_target().

> For PPC, malc has already verified that it *never* happens.  If he
> puts "trap" insns there instead of "nop" insns, he'll see the trap.

...but on the other hand that ought to work for PPC too, so
presumably my analysis is wrong somewhere.

-- PMM

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 18:30             ` Peter Maydell
@ 2012-09-19 18:35               ` Richard Henderson
  2012-09-19 19:53               ` Richard Henderson
  1 sibling, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 18:35 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 09/19/2012 11:30 AM, Peter Maydell wrote:
> On 19 September 2012 19:01, Richard Henderson <rth@twiddle.net> wrote:
>> On 09/19/2012 10:51 AM, Aurelien Jarno wrote:
>>> That said it is not a valid reason to not keep the value during
>>> re-translation, as it means the TB will exit instead of linking to
>>> the next one. The consequences are only the performance.
>>
>> We still have the problem of when is the goto_tb link initialized the *first* time?
>> Where we expect the goto_tb to fall through to stuff+exit_tb?
>>
>> For i386 it's during translation, with no care for re-translation.
>>
>> For ARM?  I can't see that it is.
> 
> I think the answer to this is that the only caller of cpu_gen_code()
> is tb_gen_code(), which always then calls tb_link_page()
> which calls tb_reset_jump() which calls tb_set_jmp_target().

That looks correct.  If convoluted.  ;-)

>> For PPC, malc has already verified that it *never* happens.  If he
>> puts "trap" insns there instead of "nop" insns, he'll see the trap.
> 
> ...but on the other hand that ought to work for PPC too, so
> presumably my analysis is wrong somewhere.

malc?  Breakpoint on ppc_tb_set_jmp_target?


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 18:30             ` Peter Maydell
  2012-09-19 18:35               ` Richard Henderson
@ 2012-09-19 19:53               ` Richard Henderson
  2012-09-19 20:05                 ` Peter Maydell
  2012-09-20  0:29                 ` Max Filippov
  1 sibling, 2 replies; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 19:53 UTC (permalink / raw)
  To: Max Filippov; +Cc: Peter Maydell, qemu-devel, Aurelien Jarno

On 09/19/2012 11:30 AM, Peter Maydell wrote:
> ...but on the other hand that ought to work for PPC too, so
> presumably my analysis is wrong somewhere.

It isn't.  It's a target-xtensa bug.

> OP:
>  ---- 0xd0079cff
>  movi_i32 tmp0,$0xd0079cff
>  movi_i32 tmp1,$0x2
>  movi_i32 tmp2,$0x1
>  movi_i32 tmp3,$advance_ccount
>  call tmp3,$0x0,$0,env,tmp2
>  movi_i32 tmp2,$window_check
>  call tmp2,$0x0,$0,env,tmp0,tmp1
>  and_i32 tmp0,ar9,ar8
>  movi_i32 tmp1,$0x0
>  brcond_i32 tmp0,tmp1,eq,$0x0
>  movi_i32 tmp2,$0x0
>  brcond_i32 LCOUNT,tmp2,eq,$0x1
>  movi_i32 tmp2,$0x1
>  sub_i32 LCOUNT,LCOUNT,tmp2
>  movi_i32 tmp2,$0xd0079cf2
>  mov_i32 pc,tmp2
>  goto_tb $0x0
>  exit_tb $0x4a116558
>  set_label $0x1
>  movi_i32 tmp2,$0xd0079d02
>  mov_i32 pc,tmp2
>  exit_tb $0x0
>  set_label $0x0
>  movi_i32 tmp2,$0xd0079d1a
>  mov_i32 pc,tmp2
>  goto_tb $0x1
>  exit_tb $0x4a116559
>  movi_i32 tmp0,$0x0
>  brcond_i32 LCOUNT,tmp0,eq,$0x2
>  movi_i32 tmp0,$0x1
>  sub_i32 LCOUNT,LCOUNT,tmp0
>  movi_i32 tmp0,$0xd0079cf2
>  mov_i32 pc,tmp0
>  goto_tb $0x0
>  exit_tb $0x4a116558
>  set_label $0x2
>  movi_i32 tmp0,$0xd0079d02
>  mov_i32 pc,tmp0
>  exit_tb $0x0

There are two instances of goto_tb $0 in here.
And, amusingly, two checks for LCOUNT.

Since there's no disassembler for xtensa, I'll
leave it to the maintainer to track down from
whence this mistake stems.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 19:53               ` Richard Henderson
@ 2012-09-19 20:05                 ` Peter Maydell
  2012-09-19 21:21                   ` Richard Henderson
  2012-09-20  0:29                 ` Max Filippov
  1 sibling, 1 reply; 22+ messages in thread
From: Peter Maydell @ 2012-09-19 20:05 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 19 September 2012 20:53, Richard Henderson <rth@twiddle.net> wrote:
> On 09/19/2012 11:30 AM, Peter Maydell wrote:
>> ...but on the other hand that ought to work for PPC too, so
>> presumably my analysis is wrong somewhere.
>
> It isn't.  It's a target-xtensa bug.

> There are two instances of goto_tb $0 in here.

I notice tcg/README doesn't actually mention this restriction
on goto_tb, incidentally.

(I wonder if there are enough easily checkable and plausibly
violated constraints on generated TCG to make it worth having
a debug mode pass which checks them? "Only one goto_tb X per
TB" is an easy check...)

-- PMM

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 20:05                 ` Peter Maydell
@ 2012-09-19 21:21                   ` Richard Henderson
  0 siblings, 0 replies; 22+ messages in thread
From: Richard Henderson @ 2012-09-19 21:21 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Max Filippov, qemu-devel, Aurelien Jarno

On 09/19/2012 01:05 PM, Peter Maydell wrote:
> I notice tcg/README doesn't actually mention this restriction
> on goto_tb, incidentally.

Nor does it mention that only indicies 0 and 1 are valid.
I.e. no use of goto_tb $2 for multi-way branches.

> (I wonder if there are enough easily checkable and plausibly
> violated constraints on generated TCG to make it worth having
> a debug mode pass which checks them? "Only one goto_tb X per
> TB" is an easy check...)

Sure.  Enabled via the existing --enable-tcg-debug I would presume.


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-19 19:53               ` Richard Henderson
  2012-09-19 20:05                 ` Peter Maydell
@ 2012-09-20  0:29                 ` Max Filippov
  2012-09-20 14:03                   ` Richard Henderson
  2012-09-20 22:53                   ` malc
  1 sibling, 2 replies; 22+ messages in thread
From: Max Filippov @ 2012-09-20  0:29 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Peter Maydell, qemu-devel, Aurelien Jarno

On Wed, Sep 19, 2012 at 11:53 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 09/19/2012 11:30 AM, Peter Maydell wrote:
>> ...but on the other hand that ought to work for PPC too, so
>> presumably my analysis is wrong somewhere.
>
> It isn't.  It's a target-xtensa bug.
>
>> OP:
>>  ---- 0xd0079cff
>>  movi_i32 tmp0,$0xd0079cff
>>  movi_i32 tmp1,$0x2
>>  movi_i32 tmp2,$0x1
>>  movi_i32 tmp3,$advance_ccount
>>  call tmp3,$0x0,$0,env,tmp2
>>  movi_i32 tmp2,$window_check
>>  call tmp2,$0x0,$0,env,tmp0,tmp1
>>  and_i32 tmp0,ar9,ar8
>>  movi_i32 tmp1,$0x0
>>  brcond_i32 tmp0,tmp1,eq,$0x0
>>  movi_i32 tmp2,$0x0
>>  brcond_i32 LCOUNT,tmp2,eq,$0x1
>>  movi_i32 tmp2,$0x1
>>  sub_i32 LCOUNT,LCOUNT,tmp2
>>  movi_i32 tmp2,$0xd0079cf2
>>  mov_i32 pc,tmp2
>>  goto_tb $0x0
>>  exit_tb $0x4a116558
>>  set_label $0x1
>>  movi_i32 tmp2,$0xd0079d02
>>  mov_i32 pc,tmp2
>>  exit_tb $0x0
>>  set_label $0x0
>>  movi_i32 tmp2,$0xd0079d1a
>>  mov_i32 pc,tmp2
>>  goto_tb $0x1
>>  exit_tb $0x4a116559
>>  movi_i32 tmp0,$0x0
>>  brcond_i32 LCOUNT,tmp0,eq,$0x2
>>  movi_i32 tmp0,$0x1
>>  sub_i32 LCOUNT,LCOUNT,tmp0
>>  movi_i32 tmp0,$0xd0079cf2
>>  mov_i32 pc,tmp0
>>  goto_tb $0x0
>>  exit_tb $0x4a116558
>>  set_label $0x2
>>  movi_i32 tmp0,$0xd0079d02
>>  mov_i32 pc,tmp0
>>  exit_tb $0x0
>
> There are two instances of goto_tb $0 in here.
> And, amusingly, two checks for LCOUNT.
>
> Since there's no disassembler for xtensa, I'll
> leave it to the maintainer to track down from
> whence this mistake stems.

This code is generated when TB ends on LEND
(zero-overhead loop ending) with branching instruction.
So, in addition to 3 way branch there's extra looping code
generated by unconditional gen_check_loop_end(dc, 0);
at the end of disas_xtensa_insn. I was pretty sure that this
dead code would make no harm. --enable-debug-tcg says
nothing on x86_64 host. The following should fix that:

-- >8 --
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Thu, 20 Sep 2012 04:07:20 +0400
Subject: [PATCH] target-xtensa: don't emit extra gen_check_loop_end

Unconditional gen_check_loop_end at the end of disas_xtensa_insn
can emit tcg_gen_goto_tb with slot id already used in the TB (e.g. when
TB ends at LEND with a branch). Not all tcg backends can handle that.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 target-xtensa/translate.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 63b37b3..57a2b6f 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2502,7 +2502,9 @@ static void disas_xtensa_insn(DisasContext *dc)
         break;
     }

-    gen_check_loop_end(dc, 0);
+    if (dc->is_jmp == DISAS_NEXT) {
+        gen_check_loop_end(dc, 0);
+    }
     dc->pc = dc->next_pc;

     return;
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-20  0:29                 ` Max Filippov
@ 2012-09-20 14:03                   ` Richard Henderson
  2012-09-20 14:22                     ` Max Filippov
  2012-09-20 22:53                   ` malc
  1 sibling, 1 reply; 22+ messages in thread
From: Richard Henderson @ 2012-09-20 14:03 UTC (permalink / raw)
  To: Max Filippov; +Cc: Peter Maydell, qemu-devel, Aurelien Jarno

On 09/19/2012 05:29 PM, Max Filippov wrote:
> Not all tcg backends can handle that.

*No* tcg backends can handle that.

If we fix the bug wherein i386 clobbers the goto_tb target
during re-translation you'll see the crash there too.

And we were just talking about enhancing --enable-debug-tcg
to detect this case...


r~

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-20 14:03                   ` Richard Henderson
@ 2012-09-20 14:22                     ` Max Filippov
  0 siblings, 0 replies; 22+ messages in thread
From: Max Filippov @ 2012-09-20 14:22 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Peter Maydell, qemu-devel, Aurelien Jarno

On Thu, Sep 20, 2012 at 6:03 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 09/19/2012 05:29 PM, Max Filippov wrote:
>> Not all tcg backends can handle that.
>
> *No* tcg backends can handle that.
>
> If we fix the bug wherein i386 clobbers the goto_tb target
> during re-translation you'll see the crash there too.

Ok, I will document it in the places that I know.

> And we were just talking about enhancing --enable-debug-tcg
> to detect this case...

I can try to do it too as my homework (:

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Qemu-devel] Shifts, ppc[64], xtensa
  2012-09-20  0:29                 ` Max Filippov
  2012-09-20 14:03                   ` Richard Henderson
@ 2012-09-20 22:53                   ` malc
  1 sibling, 0 replies; 22+ messages in thread
From: malc @ 2012-09-20 22:53 UTC (permalink / raw)
  To: Max Filippov; +Cc: Peter Maydell, qemu-devel, Aurelien Jarno, Richard Henderson

On Thu, 20 Sep 2012, Max Filippov wrote:

> On Wed, Sep 19, 2012 at 11:53 PM, Richard Henderson <rth@twiddle.net> wrote:
> > On 09/19/2012 11:30 AM, Peter Maydell wrote:
> >> ...but on the other hand that ought to work for PPC too, so
> >> presumably my analysis is wrong somewhere.
> >
> > It isn't.  It's a target-xtensa bug.
> >

[..snip..]

> 
> This code is generated when TB ends on LEND
> (zero-overhead loop ending) with branching instruction.
> So, in addition to 3 way branch there's extra looping code
> generated by unconditional gen_check_loop_end(dc, 0);
> at the end of disas_xtensa_insn. I was pretty sure that this
> dead code would make no harm. --enable-debug-tcg says
> nothing on x86_64 host. The following should fix that:

It does, once the commit message is fixed i can apply it.

> 
> -- >8 --
> From: Max Filippov <jcmvbkbc@gmail.com>
> Date: Thu, 20 Sep 2012 04:07:20 +0400
> Subject: [PATCH] target-xtensa: don't emit extra gen_check_loop_end
> 
> Unconditional gen_check_loop_end at the end of disas_xtensa_insn
> can emit tcg_gen_goto_tb with slot id already used in the TB (e.g. when
> TB ends at LEND with a branch). Not all tcg backends can handle that.
> 
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> ---
>  target-xtensa/translate.c |    4 +++-
>  1 files changed, 3 insertions(+), 1 deletions(-)
> 
> diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
> index 63b37b3..57a2b6f 100644
> --- a/target-xtensa/translate.c
> +++ b/target-xtensa/translate.c
> @@ -2502,7 +2502,9 @@ static void disas_xtensa_insn(DisasContext *dc)
>          break;
>      }
> 
> -    gen_check_loop_end(dc, 0);
> +    if (dc->is_jmp == DISAS_NEXT) {
> +        gen_check_loop_end(dc, 0);
> +    }
>      dc->pc = dc->next_pc;
> 
>      return;
> 

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2012-09-20 22:53 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-18 19:52 [Qemu-devel] Shifts, ppc[64], xtensa malc
2012-09-18 23:20 ` Max Filippov
2012-09-19 12:49   ` malc
2012-09-19 15:00     ` Max Filippov
2012-09-19  0:10 ` Richard Henderson
2012-09-19 12:46   ` malc
2012-09-19 12:57 ` Peter Maydell
2012-09-19 17:00   ` Richard Henderson
2012-09-19 17:02     ` Richard Henderson
2012-09-19 17:11     ` Peter Maydell
2012-09-19 17:30       ` Richard Henderson
2012-09-19 17:51         ` Aurelien Jarno
2012-09-19 18:01           ` Richard Henderson
2012-09-19 18:30             ` Peter Maydell
2012-09-19 18:35               ` Richard Henderson
2012-09-19 19:53               ` Richard Henderson
2012-09-19 20:05                 ` Peter Maydell
2012-09-19 21:21                   ` Richard Henderson
2012-09-20  0:29                 ` Max Filippov
2012-09-20 14:03                   ` Richard Henderson
2012-09-20 14:22                     ` Max Filippov
2012-09-20 22:53                   ` malc

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).