* [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
@ 2008-11-29 14:35 Andreas Färber
2008-11-30 16:31 ` Aurélien Jarno
0 siblings, 1 reply; 7+ messages in thread
From: Andreas Färber @ 2008-11-29 14:35 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurélien Jarno
[-- Attachment #1: Type: text/plain, Size: 4066 bytes --]
Replace {op,do}_440_dlmzb and op_440_dlmzb_update_Rc with inline TCG
instructions.
The two loops of do_440_dlmzb are unrolled.
Signed-off-by: Andreas Faerber <andreas.faerber@web.de>
---
Compile-tested on Linux/amd64.
diff --git a/target-ppc/op.c b/target-ppc/op.c
index 5d2cfa1..a26b1da 100644
--- a/target-ppc/op.c
+++ b/target-ppc/op.c
@@ -839,25 +839,6 @@ void OPPROTO op_4xx_tlbwe_hi (void)
}
#endif
-/* SPR micro-ops */
-/* 440 specific */
-void OPPROTO op_440_dlmzb (void)
-{
- do_440_dlmzb();
- RETURN();
-}
-
-void OPPROTO op_440_dlmzb_update_Rc (void)
-{
- if (T0 == 8)
- T0 = 0x2;
- else if (T0 < 4)
- T0 = 0x4;
- else
- T0 = 0x8;
- RETURN();
-}
-
#if !defined(CONFIG_USER_ONLY)
void OPPROTO op_store_pir (void)
{
diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
index 6addc74..a055ee6 100644
--- a/target-ppc/op_helper.c
+++ b/target-ppc/op_helper.c
@@ -1754,27 +1754,6 @@ void do_store_403_pb (int num)
}
#endif
-/* 440 specific */
-void do_440_dlmzb (void)
-{
- target_ulong mask;
- int i;
-
- i = 1;
- for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
- if ((T0 & mask) == 0)
- goto done;
- i++;
- }
- for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
- if ((T1 & mask) == 0)
- break;
- i++;
- }
- done:
- T0 = i;
-}
-
/
*****************************************************************************/
/* SPE extension helpers */
/* Use a table to make this quicker */
diff --git a/target-ppc/op_helper.h b/target-ppc/op_helper.h
index 1c046d8..aaaba5c 100644
--- a/target-ppc/op_helper.h
+++ b/target-ppc/op_helper.h
@@ -112,9 +112,6 @@ void do_4xx_tlbwe_lo (void);
void do_4xx_tlbwe_hi (void);
#endif
-/* PowerPC 440 specific helpers */
-void do_440_dlmzb (void);
-
/* PowerPC 403 specific helpers */
#if !defined(CONFIG_USER_ONLY)
void do_load_403_pb (int num);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 95cb482..59533ac 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -5872,12 +5872,49 @@ GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02,
0x00000000, PPC_440_SPEC)
{
tcg_gen_mov_tl(cpu_T[0], cpu_gpr[rS(ctx->opcode)]);
tcg_gen_mov_tl(cpu_T[1], cpu_gpr[rB(ctx->opcode)]);
- gen_op_440_dlmzb();
+ TCGv t0 = tcg_temp_new();
+ int endLabel = gen_new_label();
+ int i = 1;
+ target_ulong mask;
+ for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
+ tcg_gen_andi_tl(t0, cpu_T[0], mask);
+ int nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], i++);
+ tcg_gen_br(endLabel);
+ gen_set_label(nextLabel);
+ }
+ for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
+ tcg_gen_andi_tl(t0, cpu_T[1], mask);
+ int nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], i++);
+ tcg_gen_br(endLabel);
+ gen_set_label(nextLabel);
+ }
+ tcg_gen_movi_tl(cpu_T[0], i);
+ gen_set_label(endLabel);
+ tcg_temp_free(t0);
tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_T[0]);
tcg_gen_andi_tl(cpu_xer, cpu_xer, ~0x7F);
tcg_gen_or_tl(cpu_xer, cpu_xer, cpu_T[0]);
if (Rc(ctx->opcode)) {
- gen_op_440_dlmzb_update_Rc();
+ endLabel = gen_new_label();
+ int nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 8, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], 0x2);
+ tcg_gen_br(endLabel);
+
+ gen_set_label(nextLabel);
+ nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_GE, cpu_T[0], 4, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], 0x4);
+ tcg_gen_br(endLabel);
+
+ gen_set_label(nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], 0x8);
+
+ gen_set_label(endLabel);
tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_T[0]);
tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 0xf);
}
[-- Attachment #2: op_440_dlmzb.diff --]
[-- Type: application/octet-stream, Size: 3813 bytes --]
diff --git a/target-ppc/op.c b/target-ppc/op.c
index 5d2cfa1..a26b1da 100644
--- a/target-ppc/op.c
+++ b/target-ppc/op.c
@@ -839,25 +839,6 @@ void OPPROTO op_4xx_tlbwe_hi (void)
}
#endif
-/* SPR micro-ops */
-/* 440 specific */
-void OPPROTO op_440_dlmzb (void)
-{
- do_440_dlmzb();
- RETURN();
-}
-
-void OPPROTO op_440_dlmzb_update_Rc (void)
-{
- if (T0 == 8)
- T0 = 0x2;
- else if (T0 < 4)
- T0 = 0x4;
- else
- T0 = 0x8;
- RETURN();
-}
-
#if !defined(CONFIG_USER_ONLY)
void OPPROTO op_store_pir (void)
{
diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
index 6addc74..a055ee6 100644
--- a/target-ppc/op_helper.c
+++ b/target-ppc/op_helper.c
@@ -1754,27 +1754,6 @@ void do_store_403_pb (int num)
}
#endif
-/* 440 specific */
-void do_440_dlmzb (void)
-{
- target_ulong mask;
- int i;
-
- i = 1;
- for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
- if ((T0 & mask) == 0)
- goto done;
- i++;
- }
- for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
- if ((T1 & mask) == 0)
- break;
- i++;
- }
- done:
- T0 = i;
-}
-
/*****************************************************************************/
/* SPE extension helpers */
/* Use a table to make this quicker */
diff --git a/target-ppc/op_helper.h b/target-ppc/op_helper.h
index 1c046d8..aaaba5c 100644
--- a/target-ppc/op_helper.h
+++ b/target-ppc/op_helper.h
@@ -112,9 +112,6 @@ void do_4xx_tlbwe_lo (void);
void do_4xx_tlbwe_hi (void);
#endif
-/* PowerPC 440 specific helpers */
-void do_440_dlmzb (void);
-
/* PowerPC 403 specific helpers */
#if !defined(CONFIG_USER_ONLY)
void do_load_403_pb (int num);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 95cb482..59533ac 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -5872,12 +5872,49 @@ GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02, 0x00000000, PPC_440_SPEC)
{
tcg_gen_mov_tl(cpu_T[0], cpu_gpr[rS(ctx->opcode)]);
tcg_gen_mov_tl(cpu_T[1], cpu_gpr[rB(ctx->opcode)]);
- gen_op_440_dlmzb();
+ TCGv t0 = tcg_temp_new();
+ int endLabel = gen_new_label();
+ int i = 1;
+ target_ulong mask;
+ for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
+ tcg_gen_andi_tl(t0, cpu_T[0], mask);
+ int nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], i++);
+ tcg_gen_br(endLabel);
+ gen_set_label(nextLabel);
+ }
+ for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
+ tcg_gen_andi_tl(t0, cpu_T[1], mask);
+ int nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], i++);
+ tcg_gen_br(endLabel);
+ gen_set_label(nextLabel);
+ }
+ tcg_gen_movi_tl(cpu_T[0], i);
+ gen_set_label(endLabel);
+ tcg_temp_free(t0);
tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_T[0]);
tcg_gen_andi_tl(cpu_xer, cpu_xer, ~0x7F);
tcg_gen_or_tl(cpu_xer, cpu_xer, cpu_T[0]);
if (Rc(ctx->opcode)) {
- gen_op_440_dlmzb_update_Rc();
+ endLabel = gen_new_label();
+ int nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 8, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], 0x2);
+ tcg_gen_br(endLabel);
+
+ gen_set_label(nextLabel);
+ nextLabel = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_GE, cpu_T[0], 4, nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], 0x4);
+ tcg_gen_br(endLabel);
+
+ gen_set_label(nextLabel);
+ tcg_gen_movi_tl(cpu_T[0], 0x8);
+
+ gen_set_label(endLabel);
tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_T[0]);
tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 0xf);
}
[-- Attachment #3: Type: text/plain, Size: 1 bytes --]
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
2008-11-29 14:35 [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG Andreas Färber
@ 2008-11-30 16:31 ` Aurélien Jarno
2008-11-30 17:25 ` Andreas Färber
2008-11-30 17:27 ` Aurélien Jarno
0 siblings, 2 replies; 7+ messages in thread
From: Aurélien Jarno @ 2008-11-30 16:31 UTC (permalink / raw)
To: qemu-devel
On Sat, Nov 29, 2008 at 03:35:21PM +0100, Andreas Färber wrote:
> Replace {op,do}_440_dlmzb and op_440_dlmzb_update_Rc with inline TCG
> instructions.
>
> The two loops of do_440_dlmzb are unrolled.
>
> Signed-off-by: Andreas Faerber <andreas.faerber@web.de>
Doing loops with TCG is a bad idea as it is very inefficient.
> ---
> Compile-tested on Linux/amd64.
>
> diff --git a/target-ppc/op.c b/target-ppc/op.c
> index 5d2cfa1..a26b1da 100644
> --- a/target-ppc/op.c
> +++ b/target-ppc/op.c
> @@ -839,25 +839,6 @@ void OPPROTO op_4xx_tlbwe_hi (void)
> }
> #endif
>
> -/* SPR micro-ops */
> -/* 440 specific */
> -void OPPROTO op_440_dlmzb (void)
> -{
> - do_440_dlmzb();
> - RETURN();
> -}
> -
> -void OPPROTO op_440_dlmzb_update_Rc (void)
> -{
> - if (T0 == 8)
> - T0 = 0x2;
> - else if (T0 < 4)
> - T0 = 0x4;
> - else
> - T0 = 0x8;
> - RETURN();
> -}
> -
> #if !defined(CONFIG_USER_ONLY)
> void OPPROTO op_store_pir (void)
> {
> diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
> index 6addc74..a055ee6 100644
> --- a/target-ppc/op_helper.c
> +++ b/target-ppc/op_helper.c
> @@ -1754,27 +1754,6 @@ void do_store_403_pb (int num)
> }
> #endif
>
> -/* 440 specific */
> -void do_440_dlmzb (void)
> -{
> - target_ulong mask;
> - int i;
> -
> - i = 1;
> - for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
> - if ((T0 & mask) == 0)
> - goto done;
> - i++;
> - }
> - for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
> - if ((T1 & mask) == 0)
> - break;
> - i++;
> - }
> - done:
> - T0 = i;
> -}
> -
> /
> *****************************************************************************/
> /* SPE extension helpers */
> /* Use a table to make this quicker */
> diff --git a/target-ppc/op_helper.h b/target-ppc/op_helper.h
> index 1c046d8..aaaba5c 100644
> --- a/target-ppc/op_helper.h
> +++ b/target-ppc/op_helper.h
> @@ -112,9 +112,6 @@ void do_4xx_tlbwe_lo (void);
> void do_4xx_tlbwe_hi (void);
> #endif
>
> -/* PowerPC 440 specific helpers */
> -void do_440_dlmzb (void);
> -
> /* PowerPC 403 specific helpers */
> #if !defined(CONFIG_USER_ONLY)
> void do_load_403_pb (int num);
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 95cb482..59533ac 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -5872,12 +5872,49 @@ GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02,
> 0x00000000, PPC_440_SPEC)
> {
> tcg_gen_mov_tl(cpu_T[0], cpu_gpr[rS(ctx->opcode)]);
> tcg_gen_mov_tl(cpu_T[1], cpu_gpr[rB(ctx->opcode)]);
> - gen_op_440_dlmzb();
> + TCGv t0 = tcg_temp_new();
> + int endLabel = gen_new_label();
> + int i = 1;
> + target_ulong mask;
> + for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
> + tcg_gen_andi_tl(t0, cpu_T[0], mask);
> + int nextLabel = gen_new_label();
> + tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
> + tcg_gen_movi_tl(cpu_T[0], i++);
> + tcg_gen_br(endLabel);
> + gen_set_label(nextLabel);
> + }
> + for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
> + tcg_gen_andi_tl(t0, cpu_T[1], mask);
> + int nextLabel = gen_new_label();
> + tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
> + tcg_gen_movi_tl(cpu_T[0], i++);
> + tcg_gen_br(endLabel);
> + gen_set_label(nextLabel);
> + }
> + tcg_gen_movi_tl(cpu_T[0], i);
> + gen_set_label(endLabel);
> + tcg_temp_free(t0);
> tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_T[0]);
> tcg_gen_andi_tl(cpu_xer, cpu_xer, ~0x7F);
> tcg_gen_or_tl(cpu_xer, cpu_xer, cpu_T[0]);
> if (Rc(ctx->opcode)) {
> - gen_op_440_dlmzb_update_Rc();
> + endLabel = gen_new_label();
> + int nextLabel = gen_new_label();
> + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 8, nextLabel);
> + tcg_gen_movi_tl(cpu_T[0], 0x2);
> + tcg_gen_br(endLabel);
> +
> + gen_set_label(nextLabel);
> + nextLabel = gen_new_label();
> + tcg_gen_brcondi_tl(TCG_COND_GE, cpu_T[0], 4, nextLabel);
> + tcg_gen_movi_tl(cpu_T[0], 0x4);
> + tcg_gen_br(endLabel);
> +
> + gen_set_label(nextLabel);
> + tcg_gen_movi_tl(cpu_T[0], 0x8);
> +
> + gen_set_label(endLabel);
> tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_T[0]);
> tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 0xf);
> }
>
>
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
2008-11-30 16:31 ` Aurélien Jarno
@ 2008-11-30 17:25 ` Andreas Färber
2008-11-30 17:40 ` Laurent Desnogues
2008-11-30 17:27 ` Aurélien Jarno
1 sibling, 1 reply; 7+ messages in thread
From: Andreas Färber @ 2008-11-30 17:25 UTC (permalink / raw)
To: Aurélien Jarno; +Cc: qemu-devel
Am 30.11.2008 um 17:31 schrieb Aurélien Jarno:
> On Sat, Nov 29, 2008 at 03:35:21PM +0100, Andreas Färber wrote:
>> Replace {op,do}_440_dlmzb and op_440_dlmzb_update_Rc with inline TCG
>> instructions.
>>
>> The two loops of do_440_dlmzb are unrolled.
>>
>> Signed-off-by: Andreas Faerber <andreas.faerber@web.de>
>
> Doing loops with TCG is a bad idea as it is very inefficient.
Did you read the code? There is no loop in TCG here, that's exactly
what I was saying.
The labels are for conditionals only and for exiting the non-loop.
If you have a better suggestion, please say so as this is one of the
dyngen ops that keep me from working on the conversion on my own system.
Andreas
>
>
>> ---
>> Compile-tested on Linux/amd64.
>>
>> diff --git a/target-ppc/op.c b/target-ppc/op.c
>> index 5d2cfa1..a26b1da 100644
>> --- a/target-ppc/op.c
>> +++ b/target-ppc/op.c
>> @@ -839,25 +839,6 @@ void OPPROTO op_4xx_tlbwe_hi (void)
>> }
>> #endif
>>
>> -/* SPR micro-ops */
>> -/* 440 specific */
>> -void OPPROTO op_440_dlmzb (void)
>> -{
>> - do_440_dlmzb();
>> - RETURN();
>> -}
>> -
>> -void OPPROTO op_440_dlmzb_update_Rc (void)
>> -{
>> - if (T0 == 8)
>> - T0 = 0x2;
>> - else if (T0 < 4)
>> - T0 = 0x4;
>> - else
>> - T0 = 0x8;
>> - RETURN();
>> -}
>> -
>> #if !defined(CONFIG_USER_ONLY)
>> void OPPROTO op_store_pir (void)
>> {
>> diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
>> index 6addc74..a055ee6 100644
>> --- a/target-ppc/op_helper.c
>> +++ b/target-ppc/op_helper.c
>> @@ -1754,27 +1754,6 @@ void do_store_403_pb (int num)
>> }
>> #endif
>>
>> -/* 440 specific */
>> -void do_440_dlmzb (void)
>> -{
>> - target_ulong mask;
>> - int i;
>> -
>> - i = 1;
>> - for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>> - if ((T0 & mask) == 0)
>> - goto done;
>> - i++;
>> - }
>> - for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>> - if ((T1 & mask) == 0)
>> - break;
>> - i++;
>> - }
>> - done:
>> - T0 = i;
>> -}
>> -
>> /
>> *****************************************************************************/
>> /* SPE extension helpers */
>> /* Use a table to make this quicker */
>> diff --git a/target-ppc/op_helper.h b/target-ppc/op_helper.h
>> index 1c046d8..aaaba5c 100644
>> --- a/target-ppc/op_helper.h
>> +++ b/target-ppc/op_helper.h
>> @@ -112,9 +112,6 @@ void do_4xx_tlbwe_lo (void);
>> void do_4xx_tlbwe_hi (void);
>> #endif
>>
>> -/* PowerPC 440 specific helpers */
>> -void do_440_dlmzb (void);
>> -
>> /* PowerPC 403 specific helpers */
>> #if !defined(CONFIG_USER_ONLY)
>> void do_load_403_pb (int num);
>> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>> index 95cb482..59533ac 100644
>> --- a/target-ppc/translate.c
>> +++ b/target-ppc/translate.c
>> @@ -5872,12 +5872,49 @@ GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02,
>> 0x00000000, PPC_440_SPEC)
>> {
>> tcg_gen_mov_tl(cpu_T[0], cpu_gpr[rS(ctx->opcode)]);
>> tcg_gen_mov_tl(cpu_T[1], cpu_gpr[rB(ctx->opcode)]);
>> - gen_op_440_dlmzb();
>> + TCGv t0 = tcg_temp_new();
>> + int endLabel = gen_new_label();
>> + int i = 1;
>> + target_ulong mask;
>> + for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>> + tcg_gen_andi_tl(t0, cpu_T[0], mask);
>> + int nextLabel = gen_new_label();
>> + tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
>> + tcg_gen_movi_tl(cpu_T[0], i++);
>> + tcg_gen_br(endLabel);
>> + gen_set_label(nextLabel);
>> + }
>> + for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>> + tcg_gen_andi_tl(t0, cpu_T[1], mask);
>> + int nextLabel = gen_new_label();
>> + tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
>> + tcg_gen_movi_tl(cpu_T[0], i++);
>> + tcg_gen_br(endLabel);
>> + gen_set_label(nextLabel);
>> + }
>> + tcg_gen_movi_tl(cpu_T[0], i);
>> + gen_set_label(endLabel);
>> + tcg_temp_free(t0);
>> tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_T[0]);
>> tcg_gen_andi_tl(cpu_xer, cpu_xer, ~0x7F);
>> tcg_gen_or_tl(cpu_xer, cpu_xer, cpu_T[0]);
>> if (Rc(ctx->opcode)) {
>> - gen_op_440_dlmzb_update_Rc();
>> + endLabel = gen_new_label();
>> + int nextLabel = gen_new_label();
>> + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 8, nextLabel);
>> + tcg_gen_movi_tl(cpu_T[0], 0x2);
>> + tcg_gen_br(endLabel);
>> +
>> + gen_set_label(nextLabel);
>> + nextLabel = gen_new_label();
>> + tcg_gen_brcondi_tl(TCG_COND_GE, cpu_T[0], 4, nextLabel);
>> + tcg_gen_movi_tl(cpu_T[0], 0x4);
>> + tcg_gen_br(endLabel);
>> +
>> + gen_set_label(nextLabel);
>> + tcg_gen_movi_tl(cpu_T[0], 0x8);
>> +
>> + gen_set_label(endLabel);
>> tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_T[0]);
>> tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 0xf);
>> }
>>
>
>
>>
>
>
> --
> .''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
> : :' : Debian developer | Electrical Engineer
> `. `' aurel32@debian.org | aurelien@aurel32.net
> `- people.debian.org/~aurel32 | www.aurel32.net
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
2008-11-30 16:31 ` Aurélien Jarno
2008-11-30 17:25 ` Andreas Färber
@ 2008-11-30 17:27 ` Aurélien Jarno
1 sibling, 0 replies; 7+ messages in thread
From: Aurélien Jarno @ 2008-11-30 17:27 UTC (permalink / raw)
To: Andreas Faerber; +Cc: qemu-devel
On Sun, Nov 30, 2008 at 05:31:17PM +0100, Aurélien Jarno wrote:
> On Sat, Nov 29, 2008 at 03:35:21PM +0100, Andreas Färber wrote:
> > Replace {op,do}_440_dlmzb and op_440_dlmzb_update_Rc with inline TCG
> > instructions.
> >
> > The two loops of do_440_dlmzb are unrolled.
> >
> > Signed-off-by: Andreas Faerber <andreas.faerber@web.de>
>
> Doing loops with TCG is a bad idea as it is very inefficient.
>
I have converted this instruction in r5836.
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
2008-11-30 17:25 ` Andreas Färber
@ 2008-11-30 17:40 ` Laurent Desnogues
2008-11-30 18:03 ` Andreas Färber
0 siblings, 1 reply; 7+ messages in thread
From: Laurent Desnogues @ 2008-11-30 17:40 UTC (permalink / raw)
To: qemu-devel
On Sun, Nov 30, 2008 at 6:25 PM, Andreas Färber <andreas.faerber@web.de> wrote:
>
> Am 30.11.2008 um 17:31 schrieb Aurélien Jarno:
>>
>> Doing loops with TCG is a bad idea as it is very inefficient.
>
> Did you read the code? There is no loop in TCG here, that's exactly what I
> was saying.
> The labels are for conditionals only and for exiting the non-loop.
>
> If you have a better suggestion, please say so as this is one of the dyngen
> ops that keep me from working on the conversion on my own system.
Going for a helper might be better for larger sequences of TCG ops;
Fabrice mentions a threshold of about 20 ops. And your replacement
is certainly larger than 20 ops :-) Plus given it uses brcond, it would
prevent TCG liveness analysis from doing its job in the basic block
containing that instruction.
Laurent
> Andreas
>
>>
>>
>>> ---
>>> Compile-tested on Linux/amd64.
>>>
>>> diff --git a/target-ppc/op.c b/target-ppc/op.c
>>> index 5d2cfa1..a26b1da 100644
>>> --- a/target-ppc/op.c
>>> +++ b/target-ppc/op.c
>>> @@ -839,25 +839,6 @@ void OPPROTO op_4xx_tlbwe_hi (void)
>>> }
>>> #endif
>>>
>>> -/* SPR micro-ops */
>>> -/* 440 specific */
>>> -void OPPROTO op_440_dlmzb (void)
>>> -{
>>> - do_440_dlmzb();
>>> - RETURN();
>>> -}
>>> -
>>> -void OPPROTO op_440_dlmzb_update_Rc (void)
>>> -{
>>> - if (T0 == 8)
>>> - T0 = 0x2;
>>> - else if (T0 < 4)
>>> - T0 = 0x4;
>>> - else
>>> - T0 = 0x8;
>>> - RETURN();
>>> -}
>>> -
>>> #if !defined(CONFIG_USER_ONLY)
>>> void OPPROTO op_store_pir (void)
>>> {
>>> diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
>>> index 6addc74..a055ee6 100644
>>> --- a/target-ppc/op_helper.c
>>> +++ b/target-ppc/op_helper.c
>>> @@ -1754,27 +1754,6 @@ void do_store_403_pb (int num)
>>> }
>>> #endif
>>>
>>> -/* 440 specific */
>>> -void do_440_dlmzb (void)
>>> -{
>>> - target_ulong mask;
>>> - int i;
>>> -
>>> - i = 1;
>>> - for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>>> - if ((T0 & mask) == 0)
>>> - goto done;
>>> - i++;
>>> - }
>>> - for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>>> - if ((T1 & mask) == 0)
>>> - break;
>>> - i++;
>>> - }
>>> - done:
>>> - T0 = i;
>>> -}
>>> -
>>> /
>>>
>>> *****************************************************************************/
>>> /* SPE extension helpers */
>>> /* Use a table to make this quicker */
>>> diff --git a/target-ppc/op_helper.h b/target-ppc/op_helper.h
>>> index 1c046d8..aaaba5c 100644
>>> --- a/target-ppc/op_helper.h
>>> +++ b/target-ppc/op_helper.h
>>> @@ -112,9 +112,6 @@ void do_4xx_tlbwe_lo (void);
>>> void do_4xx_tlbwe_hi (void);
>>> #endif
>>>
>>> -/* PowerPC 440 specific helpers */
>>> -void do_440_dlmzb (void);
>>> -
>>> /* PowerPC 403 specific helpers */
>>> #if !defined(CONFIG_USER_ONLY)
>>> void do_load_403_pb (int num);
>>> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
>>> index 95cb482..59533ac 100644
>>> --- a/target-ppc/translate.c
>>> +++ b/target-ppc/translate.c
>>> @@ -5872,12 +5872,49 @@ GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02,
>>> 0x00000000, PPC_440_SPEC)
>>> {
>>> tcg_gen_mov_tl(cpu_T[0], cpu_gpr[rS(ctx->opcode)]);
>>> tcg_gen_mov_tl(cpu_T[1], cpu_gpr[rB(ctx->opcode)]);
>>> - gen_op_440_dlmzb();
>>> + TCGv t0 = tcg_temp_new();
>>> + int endLabel = gen_new_label();
>>> + int i = 1;
>>> + target_ulong mask;
>>> + for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>>> + tcg_gen_andi_tl(t0, cpu_T[0], mask);
>>> + int nextLabel = gen_new_label();
>>> + tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
>>> + tcg_gen_movi_tl(cpu_T[0], i++);
>>> + tcg_gen_br(endLabel);
>>> + gen_set_label(nextLabel);
>>> + }
>>> + for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
>>> + tcg_gen_andi_tl(t0, cpu_T[1], mask);
>>> + int nextLabel = gen_new_label();
>>> + tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, nextLabel);
>>> + tcg_gen_movi_tl(cpu_T[0], i++);
>>> + tcg_gen_br(endLabel);
>>> + gen_set_label(nextLabel);
>>> + }
>>> + tcg_gen_movi_tl(cpu_T[0], i);
>>> + gen_set_label(endLabel);
>>> + tcg_temp_free(t0);
>>> tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_T[0]);
>>> tcg_gen_andi_tl(cpu_xer, cpu_xer, ~0x7F);
>>> tcg_gen_or_tl(cpu_xer, cpu_xer, cpu_T[0]);
>>> if (Rc(ctx->opcode)) {
>>> - gen_op_440_dlmzb_update_Rc();
>>> + endLabel = gen_new_label();
>>> + int nextLabel = gen_new_label();
>>> + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 8, nextLabel);
>>> + tcg_gen_movi_tl(cpu_T[0], 0x2);
>>> + tcg_gen_br(endLabel);
>>> +
>>> + gen_set_label(nextLabel);
>>> + nextLabel = gen_new_label();
>>> + tcg_gen_brcondi_tl(TCG_COND_GE, cpu_T[0], 4, nextLabel);
>>> + tcg_gen_movi_tl(cpu_T[0], 0x4);
>>> + tcg_gen_br(endLabel);
>>> +
>>> + gen_set_label(nextLabel);
>>> + tcg_gen_movi_tl(cpu_T[0], 0x8);
>>> +
>>> + gen_set_label(endLabel);
>>> tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_T[0]);
>>> tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 0xf);
>>> }
>>>
>>
>>
>>>
>>
>>
>> --
>> .''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
>> : :' : Debian developer | Electrical Engineer
>> `. `' aurel32@debian.org | aurelien@aurel32.net
>> `- people.debian.org/~aurel32 | www.aurel32.net
>>
>>
>
>
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
2008-11-30 17:40 ` Laurent Desnogues
@ 2008-11-30 18:03 ` Andreas Färber
2008-11-30 18:45 ` Laurent Desnogues
0 siblings, 1 reply; 7+ messages in thread
From: Andreas Färber @ 2008-11-30 18:03 UTC (permalink / raw)
To: qemu-devel
Am 30.11.2008 um 18:40 schrieb Laurent Desnogues:
> On Sun, Nov 30, 2008 at 6:25 PM, Andreas Färber <andreas.faerber@web.de
> > wrote:
>>
>> Am 30.11.2008 um 17:31 schrieb Aurélien Jarno:
>>>
>>> Doing loops with TCG is a bad idea as it is very inefficient.
>>
>> Did you read the code? There is no loop in TCG here, that's exactly
>> what I
>> was saying.
>> The labels are for conditionals only and for exiting the non-loop.
>>
>> If you have a better suggestion, please say so as this is one of
>> the dyngen
>> ops that keep me from working on the conversion on my own system.
>
> Going for a helper might be better for larger sequences of TCG ops;
> Fabrice mentions a threshold of about 20 ops. And your replacement
> is certainly larger than 20 ops :-) Plus given it uses brcond, it
> would
> prevent TCG liveness analysis from doing its job in the basic block
> containing that instruction.
Thanks for the explanation!
So, if I encounter a dyngen op that calls a helper, then in general I
should convert it to TCG code calling the helper?
My next problem is op_440_tlbre, which does call a helper, but with a
PARAM1 argument. Any hint how to translate that?
Andreas
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG
2008-11-30 18:03 ` Andreas Färber
@ 2008-11-30 18:45 ` Laurent Desnogues
0 siblings, 0 replies; 7+ messages in thread
From: Laurent Desnogues @ 2008-11-30 18:45 UTC (permalink / raw)
To: qemu-devel
On Sun, Nov 30, 2008 at 7:03 PM, Andreas Färber <andreas.faerber@web.de> wrote:
>
> So, if I encounter a dyngen op that calls a helper, then in general I should
> convert it to TCG code calling the helper?
I guess it depends, but in most cases probably. If an instruction is
not used often and its implementation in the old helper is rather
long or uses branches (if/else, loops), you should keep it as a
helper.
> My next problem is op_440_tlbre, which does call a helper, but with a PARAM1
> argument. Any hint how to translate that?
I am not familiar at all with the old generator, so take what I say with
a grain of salt :-)
PARAM1 probably was a parameter passed at runtime. Now with
the way helper calls are generated, you explicitly pass parameters.
Your best bet is to look at how Aurélien reimplemented dlmzb or
any other complex helpers since he started the conversion to TCG.
Laurent
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-11-30 18:45 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-29 14:35 [Qemu-devel] [PATCH] ppc: Convert op_440_dlmzb to TCG Andreas Färber
2008-11-30 16:31 ` Aurélien Jarno
2008-11-30 17:25 ` Andreas Färber
2008-11-30 17:40 ` Laurent Desnogues
2008-11-30 18:03 ` Andreas Färber
2008-11-30 18:45 ` Laurent Desnogues
2008-11-30 17:27 ` Aurélien Jarno
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).