* [Qemu-devel] [PATCH] TCG: fix copy propagation
@ 2011-08-06 14:06 Blue Swirl
2011-08-06 20:13 ` Stefan Weil
0 siblings, 1 reply; 7+ messages in thread
From: Blue Swirl @ 2011-08-06 14:06 UTC (permalink / raw)
To: Kirill Batuzov, zhur, qemu-devel, Stefan Weil, TeLeMan
[-- Attachment #1: Type: text/plain, Size: 3348 bytes --]
Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
considered only global registers. However, register temps and stack
allocated locals must be handled differently because register temps
don't survive across brcond.
Fix by propagating only within same class of temps.
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
tcg/optimize.c | 13 +++++++------
tcg/tcg.h | 5 +++++
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a3bfa5e..748ecf9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -185,12 +185,13 @@ static int op_to_movi(int op)
}
}
-static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
- int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
+ TCGArg src, int nb_temps, int nb_globals)
{
reset_temp(dst, nb_temps, nb_globals);
assert(temps[src].state != TCG_TEMP_COPY);
- if (src >= nb_globals) {
+ if (src >= nb_globals &&
+ tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
assert(temps[src].state != TCG_TEMP_CONST);
if (temps[src].state != TCG_TEMP_HAS_COPY) {
temps[src].state = TCG_TEMP_HAS_COPY;
@@ -474,7 +475,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 3;
@@ -500,7 +501,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
nb_globals);
gen_args += 2;
args += 3;
@@ -523,7 +524,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
uint16_t *tcg_opc_ptr,
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 2;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index e76f9af..e2a7095 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -410,6 +410,11 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
void tcg_temp_free_i64(TCGv_i64 arg);
char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size,
TCGv_i64 arg);
+static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
+{
+ return s->temps[arg].temp_local;
+}
+
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
--
1.6.2.4
[-- Attachment #2: 0001-TCG-fix-copy-propagation.patch --]
[-- Type: text/x-patch, Size: 3634 bytes --]
From f8f16733f469eebae8f6132abc70e7357e41cf7f Mon Sep 17 00:00:00 2001
Message-Id: <f8f16733f469eebae8f6132abc70e7357e41cf7f.1312639467.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Sat, 6 Aug 2011 13:58:47 +0000
Subject: [PATCH] TCG: fix copy propagation
Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
considered only global registers. However, register temps and stack
allocated locals must be handled differently because register temps
don't survive across brcond.
Fix by propagating only within same class of temps.
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
tcg/optimize.c | 13 +++++++------
tcg/tcg.h | 5 +++++
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a3bfa5e..748ecf9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -185,12 +185,13 @@ static int op_to_movi(int op)
}
}
-static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
- int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
+ TCGArg src, int nb_temps, int nb_globals)
{
reset_temp(dst, nb_temps, nb_globals);
assert(temps[src].state != TCG_TEMP_COPY);
- if (src >= nb_globals) {
+ if (src >= nb_globals &&
+ tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
assert(temps[src].state != TCG_TEMP_CONST);
if (temps[src].state != TCG_TEMP_HAS_COPY) {
temps[src].state = TCG_TEMP_HAS_COPY;
@@ -474,7 +475,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 3;
@@ -500,7 +501,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
nb_globals);
gen_args += 2;
args += 3;
@@ -523,7 +524,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 2;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index e76f9af..e2a7095 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -410,6 +410,11 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
void tcg_temp_free_i64(TCGv_i64 arg);
char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
+static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
+{
+ return s->temps[arg].temp_local;
+}
+
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
--
1.7.2.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] TCG: fix copy propagation
2011-08-06 14:06 [Qemu-devel] [PATCH] TCG: fix copy propagation Blue Swirl
@ 2011-08-06 20:13 ` Stefan Weil
2011-08-06 20:33 ` Stefan Weil
0 siblings, 1 reply; 7+ messages in thread
From: Stefan Weil @ 2011-08-06 20:13 UTC (permalink / raw)
To: Blue Swirl; +Cc: TeLeMan, qemu-devel, zhur, Kirill Batuzov
Am 06.08.2011 16:06, schrieb Blue Swirl:
> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
> considered only global registers. However, register temps and stack
> allocated locals must be handled differently because register temps
> don't survive across brcond.
>
> Fix by propagating only within same class of temps.
>
> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> tcg/optimize.c | 13 +++++++------
> tcg/tcg.h | 5 +++++
> 2 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index a3bfa5e..748ecf9 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -185,12 +185,13 @@ static int op_to_movi(int op)
> }
> }
>
> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
> - int nb_temps, int nb_globals)
> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
> + TCGArg src, int nb_temps, int nb_globals)
> {
> reset_temp(dst, nb_temps, nb_globals);
> assert(temps[src].state != TCG_TEMP_COPY);
> - if (src >= nb_globals) {
> + if (src >= nb_globals &&
> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
> assert(temps[src].state != TCG_TEMP_CONST);
[snip]
Hi Blue,
your patch fixes qemu-system-x86_64 which now seems to work on 32 bit
hosts, too.
qemu-system-mips64(el) still fail with the same abort. They work when I
remove the
if block in tcg_opt_gen_mov.
The Debian kernel for qemu-system-mips64 which I used for the test is
available on
http://qemu.weilnetz.de/mips64/.
I could not reproduce the crash with qemu-system-ppc64 - neither with
nor without
your patch.
Kind regards,
Stefan
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] TCG: fix copy propagation
2011-08-06 20:13 ` Stefan Weil
@ 2011-08-06 20:33 ` Stefan Weil
2011-08-06 21:07 ` Blue Swirl
0 siblings, 1 reply; 7+ messages in thread
From: Stefan Weil @ 2011-08-06 20:33 UTC (permalink / raw)
To: Blue Swirl; +Cc: TeLeMan, qemu-devel, zhur, Kirill Batuzov
Am 06.08.2011 22:13, schrieb Stefan Weil:
> Am 06.08.2011 16:06, schrieb Blue Swirl:
>> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
>> considered only global registers. However, register temps and stack
>> allocated locals must be handled differently because register temps
>> don't survive across brcond.
>>
>> Fix by propagating only within same class of temps.
>>
>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>> ---
>> tcg/optimize.c | 13 +++++++------
>> tcg/tcg.h | 5 +++++
>> 2 files changed, 12 insertions(+), 6 deletions(-)
>>
>> diff --git a/tcg/optimize.c b/tcg/optimize.c
>> index a3bfa5e..748ecf9 100644
>> --- a/tcg/optimize.c
>> +++ b/tcg/optimize.c
>> @@ -185,12 +185,13 @@ static int op_to_movi(int op)
>> }
>> }
>>
>> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
>> - int nb_temps, int nb_globals)
>> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg
>> dst,
>> + TCGArg src, int nb_temps, int nb_globals)
>> {
>> reset_temp(dst, nb_temps, nb_globals);
>> assert(temps[src].state != TCG_TEMP_COPY);
>> - if (src >= nb_globals) {
>> + if (src >= nb_globals &&
>> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
>> assert(temps[src].state != TCG_TEMP_CONST);
> [snip]
>
> Hi Blue,
>
> your patch fixes qemu-system-x86_64 which now seems to work on 32 bit
> hosts, too.
>
> qemu-system-mips64(el) still fail with the same abort. They work when
> I remove the
> if block in tcg_opt_gen_mov.
>
> The Debian kernel for qemu-system-mips64 which I used for the test is
> available on
> http://qemu.weilnetz.de/mips64/.
>
> I could not reproduce the crash with qemu-system-ppc64 - neither with
> nor without
> your patch.
>
> Kind regards,
> Stefan
The patch works with qemu-system-mips64(el) after a small modification:
if (src >= nb_globals &&
tcg_arg_is_local(s, src) && tcg_arg_is_local(s, dst)) {
Cheers,
Stefan
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] TCG: fix copy propagation
2011-08-06 20:33 ` Stefan Weil
@ 2011-08-06 21:07 ` Blue Swirl
0 siblings, 0 replies; 7+ messages in thread
From: Blue Swirl @ 2011-08-06 21:07 UTC (permalink / raw)
To: Stefan Weil; +Cc: TeLeMan, qemu-devel, zhur, Kirill Batuzov
On Sat, Aug 6, 2011 at 8:33 PM, Stefan Weil <weil@mail.berlios.de> wrote:
> Am 06.08.2011 22:13, schrieb Stefan Weil:
>>
>> Am 06.08.2011 16:06, schrieb Blue Swirl:
>>>
>>> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
>>> considered only global registers. However, register temps and stack
>>> allocated locals must be handled differently because register temps
>>> don't survive across brcond.
>>>
>>> Fix by propagating only within same class of temps.
>>>
>>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>>> ---
>>> tcg/optimize.c | 13 +++++++------
>>> tcg/tcg.h | 5 +++++
>>> 2 files changed, 12 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/tcg/optimize.c b/tcg/optimize.c
>>> index a3bfa5e..748ecf9 100644
>>> --- a/tcg/optimize.c
>>> +++ b/tcg/optimize.c
>>> @@ -185,12 +185,13 @@ static int op_to_movi(int op)
>>> }
>>> }
>>>
>>> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
>>> - int nb_temps, int nb_globals)
>>> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
>>> + TCGArg src, int nb_temps, int nb_globals)
>>> {
>>> reset_temp(dst, nb_temps, nb_globals);
>>> assert(temps[src].state != TCG_TEMP_COPY);
>>> - if (src >= nb_globals) {
>>> + if (src >= nb_globals &&
>>> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
>>> assert(temps[src].state != TCG_TEMP_CONST);
>>
>> [snip]
>>
>> Hi Blue,
>>
>> your patch fixes qemu-system-x86_64 which now seems to work on 32 bit
>> hosts, too.
>>
>> qemu-system-mips64(el) still fail with the same abort. They work when I
>> remove the
>> if block in tcg_opt_gen_mov.
>>
>> The Debian kernel for qemu-system-mips64 which I used for the test is
>> available on
>> http://qemu.weilnetz.de/mips64/.
>>
>> I could not reproduce the crash with qemu-system-ppc64 - neither with nor
>> without
>> your patch.
>>
>> Kind regards,
>> Stefan
>
> The patch works with qemu-system-mips64(el) after a small modification:
>
> if (src >= nb_globals &&
> tcg_arg_is_local(s, src) && tcg_arg_is_local(s, dst)) {
This removes the optimization completely for register temps. I think
there is a better solution.
The problem is similar to x86_64:
IN: free_area_init_nodes
0xffffffff806004d8: sw v0,21664(s5)
0xffffffff806004dc: lw v1,0(s0)
0xffffffff806004e0: lui v0,0x8062
0xffffffff806004e4: sw v1,21676(v0)
0xffffffff806004e8: sw v1,4(s8)
0xffffffff806004ec: lw a0,4(s0)
0xffffffff806004f0: move a1,zero
0xffffffff806004f4: sltu v0,a0,v1
0xffffffff806004f8: movz v1,a0,v0
0xffffffff806004fc: lui v0,0x8062
0xffffffff80600500: addiu s4,v0,21676
0xffffffff80600504: lui v0,0x8062
0xffffffff80600508: sw v1,4(s4)
0xffffffff8060050c: addiu a0,v0,21688
0xffffffff80600510: li a2,4
0xffffffff80600514: sw zero,8(s8)
0xffffffff80600518: jal 0xffffffff80104000
0xffffffff8060051c: sw zero,8(s4)
OP:
---- 0xffffffff806004d8
movi_i32 tmp0,$0x54a0
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,s5_0,s5_1,tmp0,tmp1
mov_i32 tmp2,v0_0
mov_i32 tmp3,v0_1
qemu_st32 tmp2,tmp0,tmp1,$0x0
---- 0xffffffff806004dc
mov_i32 tmp2,s0_0
mov_i32 tmp3,s0_1
qemu_ld32 tmp2,tmp2,tmp3,$0x0
movi_i32 tmp4,$0x1f
sar_i32 tmp3,tmp2,tmp4
mov_i32 v1_0,tmp2
mov_i32 v1_1,tmp3
---- 0xffffffff806004e0
movi_i32 v0_0,$0x80620000
movi_i32 v0_1,$0xffffffff
---- 0xffffffff806004e4
movi_i32 tmp0,$0x54ac
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,v0_0,v0_1,tmp0,tmp1
mov_i32 tmp2,v1_0
mov_i32 tmp3,v1_1
qemu_st32 tmp2,tmp0,tmp1,$0x0
---- 0xffffffff806004e8
movi_i32 tmp2,$0x4
movi_i32 tmp3,$0x0
add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
mov_i32 tmp0,v1_0
mov_i32 tmp1,v1_1
qemu_st32 tmp0,tmp2,tmp3,$0x0
---- 0xffffffff806004ec
movi_i32 tmp0,$0x4
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,s0_0,s0_1,tmp0,tmp1
qemu_ld32 tmp0,tmp0,tmp1,$0x0
movi_i32 tmp4,$0x1f
sar_i32 tmp1,tmp0,tmp4
mov_i32 a0_0,tmp0
mov_i32 a0_1,tmp1
---- 0xffffffff806004f0
movi_i32 a1_0,$0x0
movi_i32 a1_1,$0x0
---- 0xffffffff806004f4
mov_i32 tmp2,a0_0
mov_i32 tmp3,a0_1
mov_i32 tmp0,v1_0
mov_i32 tmp1,v1_1
setcond2_i32 v0_0,tmp2,tmp3,tmp0,tmp1,ltu
movi_i32 v0_1,$0x0
---- 0xffffffff806004f8
movi_i32 tmp0,$0x0
movi_i32 tmp1,$0x0
brcond2_i32 v0_0,v0_1,tmp0,tmp1,ne,$0x0
mov_i32 v1_0,a0_0
mov_i32 v1_1,a0_1
set_label $0x0
Here a0 locals are used after brcond.
---- 0xffffffff806004fc
movi_i32 v0_0,$0x80620000
movi_i32 v0_1,$0xffffffff
---- 0xffffffff80600500
movi_i32 tmp0,$0x54ac
movi_i32 tmp1,$0x0
add2_i32 s4_0,s4_1,v0_0,v0_1,tmp0,tmp1
movi_i32 tmp4,$0x1f
sar_i32 s4_1,s4_0,tmp4
---- 0xffffffff80600504
movi_i32 v0_0,$0x80620000
movi_i32 v0_1,$0xffffffff
---- 0xffffffff80600508
movi_i32 tmp0,$0x4
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,s4_0,s4_1,tmp0,tmp1
mov_i32 tmp2,v1_0
mov_i32 tmp3,v1_1
qemu_st32 tmp2,tmp0,tmp1,$0x0
---- 0xffffffff8060050c
movi_i32 tmp2,$0x54b8
movi_i32 tmp3,$0x0
add2_i32 a0_0,a0_1,v0_0,v0_1,tmp2,tmp3
movi_i32 tmp4,$0x1f
sar_i32 a0_1,a0_0,tmp4
---- 0xffffffff80600510
movi_i32 a2_0,$0x4
movi_i32 a2_1,$0x0
---- 0xffffffff80600514
movi_i32 tmp2,$0x8
movi_i32 tmp3,$0x0
add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
movi_i32 tmp0,$0x0
movi_i32 tmp1,$0x0
qemu_st32 tmp0,tmp2,tmp3,$0x0
---- 0xffffffff80600518
movi_i32 ra_0,$0x80600520
movi_i32 ra_1,$0xffffffff
---- 0xffffffff8060051c
movi_i32 tmp2,$0x8
movi_i32 tmp3,$0x0
add2_i32 tmp2,tmp3,s4_0,s4_1,tmp2,tmp3
movi_i32 tmp0,$0x0
movi_i32 tmp1,$0x0
movi_i32 hflags,$0x10898
movi_i32 btarget_0,$0x80104000
movi_i32 btarget_1,$0xffffffff
qemu_st32 tmp0,tmp2,tmp3,$0x0
movi_i32 hflags,$0x98
movi_i32 PC_0,$0x80104000
movi_i32 PC_1,$0xffffffff
exit_tb $0x0
OP after liveness analysis:
---- 0xffffffff806004d8
movi_i32 tmp0,$0x54a0
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,s5_0,s5_1,tmp0,tmp1
mov_i32 tmp2,v0_0
nopn $0x2,$0x2
qemu_st32 tmp2,tmp0,tmp1,$0x0
---- 0xffffffff806004dc
mov_i32 tmp2,s0_0
mov_i32 tmp3,s0_1
qemu_ld32 tmp2,tmp2,tmp3,$0x0
movi_i32 tmp4,$0x1f
sar_i32 tmp3,tmp2,tmp4
mov_i32 v1_0,tmp2
mov_i32 v1_1,tmp3
---- 0xffffffff806004e0
movi_i32 v0_0,$0x80620000
movi_i32 v0_1,$0xffffffff
---- 0xffffffff806004e4
movi_i32 tmp0,$0x54ac
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,v0_0,v0_1,tmp0,tmp1
nop
nop
qemu_st32 tmp2,tmp0,tmp1,$0x0
---- 0xffffffff806004e8
movi_i32 tmp2,$0x4
movi_i32 tmp3,$0x0
add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
mov_i32 tmp0,v1_0
nopn $0x2,$0x2
qemu_st32 tmp0,tmp2,tmp3,$0x0
---- 0xffffffff806004ec
movi_i32 tmp0,$0x4
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,s0_0,s0_1,tmp0,tmp1
qemu_ld32 tmp0,tmp0,tmp1,$0x0
movi_i32 tmp4,$0x1f
sar_i32 tmp1,tmp0,tmp4
mov_i32 a0_0,tmp0
mov_i32 a0_1,tmp1
---- 0xffffffff806004f0
movi_i32 a1_0,$0x0
movi_i32 a1_1,$0x0
---- 0xffffffff806004f4
mov_i32 tmp2,tmp0
mov_i32 tmp3,tmp1
mov_i32 tmp0,v1_0
mov_i32 tmp1,v1_1
setcond2_i32 v0_0,tmp2,tmp3,tmp0,tmp1,ltu
movi_i32 v0_1,$0x0
---- 0xffffffff806004f8
movi_i32 tmp0,$0x0
movi_i32 tmp1,$0x0
brcond2_i32 v0_0,v0_1,tmp0,tmp1,ne,$0x0
mov_i32 v1_0,tmp2
mov_i32 v1_1,tmp3
set_label $0x0
But here tmp2 and tmp3 which are dead after brcond. They are set to a0
values in 0xffffffff806004ec.
Maybe my patch doesn't work because tmp2&3 are register temps while
a0_0&1 are globals. So this should work:
if (src >= nb_globals && dst >= nb_globals &&
tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
---- 0xffffffff806004fc
movi_i32 v0_0,$0x80620000
movi_i32 v0_1,$0xffffffff
---- 0xffffffff80600500
movi_i32 tmp0,$0x54ac
movi_i32 tmp1,$0x0
add2_i32 s4_0,s4_1,v0_0,v0_1,tmp0,tmp1
movi_i32 tmp4,$0x1f
sar_i32 s4_1,s4_0,tmp4
---- 0xffffffff80600504
movi_i32 v0_0,$0x80620000
movi_i32 v0_1,$0xffffffff
---- 0xffffffff80600508
movi_i32 tmp0,$0x4
movi_i32 tmp1,$0x0
add2_i32 tmp0,tmp1,s4_0,s4_1,tmp0,tmp1
mov_i32 tmp2,v1_0
nopn $0x2,$0x2
qemu_st32 tmp2,tmp0,tmp1,$0x0
---- 0xffffffff8060050c
movi_i32 tmp2,$0x54b8
movi_i32 tmp3,$0x0
add2_i32 a0_0,a0_1,v0_0,v0_1,tmp2,tmp3
movi_i32 tmp4,$0x1f
sar_i32 a0_1,a0_0,tmp4
---- 0xffffffff80600510
movi_i32 a2_0,$0x4
movi_i32 a2_1,$0x0
---- 0xffffffff80600514
movi_i32 tmp2,$0x8
movi_i32 tmp3,$0x0
add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
movi_i32 tmp0,$0x0
nopn $0x2,$0x2
qemu_st32 tmp0,tmp2,tmp3,$0x0
---- 0xffffffff80600518
movi_i32 ra_0,$0x80600520
movi_i32 ra_1,$0xffffffff
---- 0xffffffff8060051c
movi_i32 tmp2,$0x8
movi_i32 tmp3,$0x0
add2_i32 tmp2,tmp3,s4_0,s4_1,tmp2,tmp3
movi_i32 tmp0,$0x0
nopn $0x2,$0x2
movi_i32 hflags,$0x10898
movi_i32 btarget_0,$0x80104000
movi_i32 btarget_1,$0xffffffff
qemu_st32 tmp0,tmp2,tmp3,$0x0
movi_i32 hflags,$0x98
movi_i32 PC_0,$0x80104000
movi_i32 PC_1,$0xffffffff
exit_tb $0x0
end
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH] TCG: fix copy propagation
@ 2011-08-06 21:26 Blue Swirl
2011-08-07 6:48 ` Stefan Weil
0 siblings, 1 reply; 7+ messages in thread
From: Blue Swirl @ 2011-08-06 21:26 UTC (permalink / raw)
To: Kirill Batuzov, zhur, Stefan Weil, TeLeMan, Kenneth Salerno,
qemu-devel
[-- Attachment #1: Type: text/plain, Size: 3491 bytes --]
Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
considered only global registers. However, register temps and stack
allocated locals must be handled differently because register temps
don't survive across brcond.
Fix by propagating only within same class of temps.
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
tcg/optimize.c | 15 +++++++++------
tcg/tcg.h | 5 +++++
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a3bfa5e..7eb5eb1 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -185,12 +185,15 @@ static int op_to_movi(int op)
}
}
-static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
- int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
+ TCGArg src, int nb_temps, int nb_globals)
{
reset_temp(dst, nb_temps, nb_globals);
assert(temps[src].state != TCG_TEMP_COPY);
- if (src >= nb_globals) {
+ /* Don't try to copy if one of temps is a global or either one
+ is local and another is register */
+ if (src >= nb_globals && dst >= nb_globals &&
+ tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
assert(temps[src].state != TCG_TEMP_CONST);
if (temps[src].state != TCG_TEMP_HAS_COPY) {
temps[src].state = TCG_TEMP_HAS_COPY;
@@ -474,7 +477,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 3;
@@ -500,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
nb_globals);
gen_args += 2;
args += 3;
@@ -523,7 +526,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
uint16_t *tcg_opc_ptr,
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 2;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index e76f9af..e2a7095 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -410,6 +410,11 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
void tcg_temp_free_i64(TCGv_i64 arg);
char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size,
TCGv_i64 arg);
+static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
+{
+ return s->temps[arg].temp_local;
+}
+
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
--
1.6.2.4
[-- Attachment #2: 0001-TCG-fix-copy-propagation.patch --]
[-- Type: text/x-patch, Size: 3777 bytes --]
From c06136b49409ba1c68457ce3ceffb5eeb355ec47 Mon Sep 17 00:00:00 2001
Message-Id: <c06136b49409ba1c68457ce3ceffb5eeb355ec47.1312665937.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Sat, 6 Aug 2011 13:58:47 +0000
Subject: [PATCH] TCG: fix copy propagation
Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
considered only global registers. However, register temps and stack
allocated locals must be handled differently because register temps
don't survive across brcond.
Fix by propagating only within same class of temps.
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
tcg/optimize.c | 15 +++++++++------
tcg/tcg.h | 5 +++++
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a3bfa5e..7eb5eb1 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -185,12 +185,15 @@ static int op_to_movi(int op)
}
}
-static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
- int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
+ TCGArg src, int nb_temps, int nb_globals)
{
reset_temp(dst, nb_temps, nb_globals);
assert(temps[src].state != TCG_TEMP_COPY);
- if (src >= nb_globals) {
+ /* Don't try to copy if one of temps is a global or either one
+ is local and another is register */
+ if (src >= nb_globals && dst >= nb_globals &&
+ tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
assert(temps[src].state != TCG_TEMP_CONST);
if (temps[src].state != TCG_TEMP_HAS_COPY) {
temps[src].state = TCG_TEMP_HAS_COPY;
@@ -474,7 +477,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 3;
@@ -500,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
nb_globals);
gen_args += 2;
args += 3;
@@ -523,7 +526,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(gen_args, args[0], args[1],
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1],
nb_temps, nb_globals);
gen_args += 2;
args += 2;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index e76f9af..e2a7095 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -410,6 +410,11 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
void tcg_temp_free_i64(TCGv_i64 arg);
char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
+static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
+{
+ return s->temps[arg].temp_local;
+}
+
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
--
1.7.2.5
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] TCG: fix copy propagation
2011-08-06 21:26 Blue Swirl
@ 2011-08-07 6:48 ` Stefan Weil
2011-08-07 9:36 ` Blue Swirl
0 siblings, 1 reply; 7+ messages in thread
From: Stefan Weil @ 2011-08-07 6:48 UTC (permalink / raw)
To: Blue Swirl; +Cc: TeLeMan, Kenneth Salerno, qemu-devel, zhur, Kirill Batuzov
Am 06.08.2011 23:26, schrieb Blue Swirl:
> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
> considered only global registers. However, register temps and stack
> allocated locals must be handled differently because register temps
> don't survive across brcond.
>
> Fix by propagating only within same class of temps.
>
> Signed-off-by: Blue Swirl<blauwirbel@gmail.com>
> ---
> tcg/optimize.c | 15 +++++++++------
> tcg/tcg.h | 5 +++++
> 2 files changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index a3bfa5e..7eb5eb1 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -185,12 +185,15 @@ static int op_to_movi(int op)
> }
> }
>
> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
> - int nb_temps, int nb_globals)
> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
> + TCGArg src, int nb_temps, int nb_globals)
> {
> reset_temp(dst, nb_temps, nb_globals);
> assert(temps[src].state != TCG_TEMP_COPY);
> - if (src>= nb_globals) {
> + /* Don't try to copy if one of temps is a global or either one
> + is local and another is register */
> + if (src>= nb_globals&& dst>= nb_globals&&
> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
> assert(temps[src].state != TCG_TEMP_CONST);
> if (temps[src].state != TCG_TEMP_HAS_COPY) {
> temps[src].state = TCG_TEMP_HAS_COPY;
> @@ -474,7 +477,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
> uint16_t *tcg_opc_ptr,
> gen_opc_buf[op_index] = INDEX_op_nop;
> } else {
> gen_opc_buf[op_index] = op_to_mov(op);
> - tcg_opt_gen_mov(gen_args, args[0], args[1],
> + tcg_opt_gen_mov(s, gen_args, args[0], args[1],
> nb_temps, nb_globals);
> gen_args += 2;
> args += 3;
> @@ -500,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
> uint16_t *tcg_opc_ptr,
> gen_opc_buf[op_index] = INDEX_op_nop;
> } else {
> gen_opc_buf[op_index] = op_to_mov(op);
> - tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
> + tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
> nb_globals);
> gen_args += 2;
> args += 3;
> @@ -523,7 +526,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
> uint16_t *tcg_opc_ptr,
> break;
> }
> if (temps[args[1]].state != TCG_TEMP_CONST) {
> - tcg_opt_gen_mov(gen_args, args[0], args[1],
> + tcg_opt_gen_mov(s, gen_args, args[0], args[1],
> nb_temps, nb_globals);
> gen_args += 2;
> args += 2;
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index e76f9af..e2a7095 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -410,6 +410,11 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
> void tcg_temp_free_i64(TCGv_i64 arg);
> char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size,
> TCGv_i64 arg);
>
> +static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
> +{
> + return s->temps[arg].temp_local;
> +}
> +
> #if defined(CONFIG_DEBUG_TCG)
> /* If you call tcg_clear_temp_count() at the start of a section of
> * code which is not supposed to leak any TCG temporaries, then
This fixes qemu-system-x86_64 and qemu-system-mips64(el) on 32 bit hosts.
Tested-by: Stefan Weil <weil@mail.berlios.de>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] TCG: fix copy propagation
2011-08-07 6:48 ` Stefan Weil
@ 2011-08-07 9:36 ` Blue Swirl
0 siblings, 0 replies; 7+ messages in thread
From: Blue Swirl @ 2011-08-07 9:36 UTC (permalink / raw)
To: Stefan Weil; +Cc: TeLeMan, Kenneth Salerno, qemu-devel, zhur, Kirill Batuzov
Thanks for testing, applied.
On Sun, Aug 7, 2011 at 6:48 AM, Stefan Weil <weil@mail.berlios.de> wrote:
> Am 06.08.2011 23:26, schrieb Blue Swirl:
>>
>> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
>> considered only global registers. However, register temps and stack
>> allocated locals must be handled differently because register temps
>> don't survive across brcond.
>>
>> Fix by propagating only within same class of temps.
>>
>> Signed-off-by: Blue Swirl<blauwirbel@gmail.com>
>> ---
>> tcg/optimize.c | 15 +++++++++------
>> tcg/tcg.h | 5 +++++
>> 2 files changed, 14 insertions(+), 6 deletions(-)
>>
>> diff --git a/tcg/optimize.c b/tcg/optimize.c
>> index a3bfa5e..7eb5eb1 100644
>> --- a/tcg/optimize.c
>> +++ b/tcg/optimize.c
>> @@ -185,12 +185,15 @@ static int op_to_movi(int op)
>> }
>> }
>>
>> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
>> - int nb_temps, int nb_globals)
>> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
>> + TCGArg src, int nb_temps, int nb_globals)
>> {
>> reset_temp(dst, nb_temps, nb_globals);
>> assert(temps[src].state != TCG_TEMP_COPY);
>> - if (src>= nb_globals) {
>> + /* Don't try to copy if one of temps is a global or either one
>> + is local and another is register */
>> + if (src>= nb_globals&& dst>= nb_globals&&
>> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
>> assert(temps[src].state != TCG_TEMP_CONST);
>> if (temps[src].state != TCG_TEMP_HAS_COPY) {
>> temps[src].state = TCG_TEMP_HAS_COPY;
>> @@ -474,7 +477,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
>> uint16_t *tcg_opc_ptr,
>> gen_opc_buf[op_index] = INDEX_op_nop;
>> } else {
>> gen_opc_buf[op_index] = op_to_mov(op);
>> - tcg_opt_gen_mov(gen_args, args[0], args[1],
>> + tcg_opt_gen_mov(s, gen_args, args[0], args[1],
>> nb_temps, nb_globals);
>> gen_args += 2;
>> args += 3;
>> @@ -500,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
>> uint16_t *tcg_opc_ptr,
>> gen_opc_buf[op_index] = INDEX_op_nop;
>> } else {
>> gen_opc_buf[op_index] = op_to_mov(op);
>> - tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
>> + tcg_opt_gen_mov(s, gen_args, args[0], args[1],
>> nb_temps,
>> nb_globals);
>> gen_args += 2;
>> args += 3;
>> @@ -523,7 +526,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
>> uint16_t *tcg_opc_ptr,
>> break;
>> }
>> if (temps[args[1]].state != TCG_TEMP_CONST) {
>> - tcg_opt_gen_mov(gen_args, args[0], args[1],
>> + tcg_opt_gen_mov(s, gen_args, args[0], args[1],
>> nb_temps, nb_globals);
>> gen_args += 2;
>> args += 2;
>> diff --git a/tcg/tcg.h b/tcg/tcg.h
>> index e76f9af..e2a7095 100644
>> --- a/tcg/tcg.h
>> +++ b/tcg/tcg.h
>> @@ -410,6 +410,11 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
>> void tcg_temp_free_i64(TCGv_i64 arg);
>> char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size,
>> TCGv_i64 arg);
>>
>> +static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
>> +{
>> + return s->temps[arg].temp_local;
>> +}
>> +
>> #if defined(CONFIG_DEBUG_TCG)
>> /* If you call tcg_clear_temp_count() at the start of a section of
>> * code which is not supposed to leak any TCG temporaries, then
>
> This fixes qemu-system-x86_64 and qemu-system-mips64(el) on 32 bit hosts.
>
> Tested-by: Stefan Weil <weil@mail.berlios.de>
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2011-08-07 9:36 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-06 14:06 [Qemu-devel] [PATCH] TCG: fix copy propagation Blue Swirl
2011-08-06 20:13 ` Stefan Weil
2011-08-06 20:33 ` Stefan Weil
2011-08-06 21:07 ` Blue Swirl
-- strict thread matches above, loose matches on Subject: below --
2011-08-06 21:26 Blue Swirl
2011-08-07 6:48 ` Stefan Weil
2011-08-07 9:36 ` Blue Swirl
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).