* Sparse-llvm question regarding handling of phi/phsrc instructions
@ 2017-09-07 16:04 Dibyendu Majumdar
2017-09-07 20:25 ` Luc Van Oostenryck
0 siblings, 1 reply; 4+ messages in thread
From: Dibyendu Majumdar @ 2017-09-07 16:04 UTC (permalink / raw)
To: Linux-Sparse
Hi,
The following code snippet:
static int test_do(void) {
int a = 0;
int count = 27;
switch (count % 8) {
case 0: do { a++;
case 7: a++;
case 6: a++;
case 5: a++;
case 4: a++;
case 3: a++;
case 2: a++;
case 1: a++;
} while ((count -= 8) > 0);
}
return a;
}
Results in following linear output:
test_do:
.L0:
<entry-point>
phisrc.32 %phi3(a) <- $0
phisrc.32 %phi4(a) <- $0
phisrc.32 %phi6(a) <- $0
phisrc.32 %phi8(a) <- $0
phisrc.32 %phi10(a) <- $0
phisrc.32 %phi12(a) <- $0
phisrc.32 %phi14(a) <- $0
phisrc.32 %phi16(a) <- $0
phisrc.32 %phi18(a) <- $0
phisrc.32 %phi20(count) <- $27
br .L7
.L10:
phi.32 %r3 <- %phi18(a), %phi19(a)
add.32 %r4 <- %r3, $1
phisrc.32 %phi17(a) <- %r4
phi.32 %r5 <- %phi16(a), %phi17(a)
add.32 %r6 <- %r5, $1
phisrc.32 %phi15(a) <- %r6
phi.32 %r7 <- %phi14(a), %phi15(a)
add.32 %r8 <- %r7, $1
phisrc.32 %phi13(a) <- %r8
phi.32 %r9 <- %phi12(a), %phi13(a)
add.32 %r10 <- %r9, $1
phisrc.32 %phi11(a) <- %r10
phi.32 %r11 <- %phi10(a), %phi11(a)
add.32 %r12 <- %r11, $1
phisrc.32 %phi9(a) <- %r12
br .L7
.L7:
phi.32 %r13 <- %phi8(a), %phi9(a)
add.32 %r14 <- %r13, $1
phisrc.32 %phi7(a) <- %r14
phi.32 %r15 <- %phi6(a), %phi7(a)
add.32 %r16 <- %r15, $1
phisrc.32 %phi5(a) <- %r16
phi.32 %r17 <- %phi4(a), %phi5(a)
add.32 %r18(a) <- %r17, $1
phisrc.32 %phi2(a) <- %r18(a)
phisrc.32 %phi19(a) <- %r18(a)
phi.32 %r19 <- %phi20(count), %phi21(count)
add.32 %r21 <- %r19, $-8
setgt.32 %r23 <- %r21, $0
phisrc.32 %phi21(count) <- %r21
cbr %r23, .L10, .L1
.L1:
phi.32 %r24 <- %phi2(a), %phi3(a)
ret.32 %r24
This is translated to following LLVM IR:
define internal i32 @test_do() {
L0:
%0 = alloca i32
%1 = alloca i32
%2 = alloca i32
%3 = alloca i32
%4 = alloca i32
%5 = alloca i32
%6 = alloca i32
%7 = alloca i32
%8 = alloca i32
%9 = alloca i32
store i32 0, i32* %9
store i32 0, i32* %7
store i32 0, i32* %6
store i32 0, i32* %5
store i32 0, i32* %4
store i32 0, i32* %3
store i32 0, i32* %2
store i32 0, i32* %1
store i32 0, i32* %0
store i32 27, i32* %8
br label %L2
L1: ; preds = %L2
%10 = load i32, i32* %0
%R4 = add i32 %10, 1
store i32 %R4, i32* %1
%11 = load i32, i32* %1
%R6 = add i32 %11, 1
store i32 %R6, i32* %2
%12 = load i32, i32* %2
%R8 = add i32 %12, 1
store i32 %R8, i32* %3
%13 = load i32, i32* %3
%R10 = add i32 %13, 1
store i32 %R10, i32* %4
%14 = load i32, i32* %4
%R12 = add i32 %14, 1
store i32 %R12, i32* %5
br label %L2
L2: ; preds = %L1, %L0
%15 = load i32, i32* %5
%R14 = add i32 %15, 1
store i32 %R14, i32* %6
%16 = load i32, i32* %6
%R16 = add i32 %16, 1
store i32 %R16, i32* %7
%17 = load i32, i32* %7
%R18 = add i32 %17, 1
store i32 %R18, i32* %9
store i32 %R18, i32* %0
%18 = load i32, i32* %8
%R21 = add i32 %18, -8
%R23 = icmp sgt i32 %R21, 0
%R231 = zext i1 %R23 to i32
store i32 %R21, i32* %8
%cond = icmp ne i32 %R231, 0
br i1 %cond, label %L1, label %L3
L3: ; preds = %L2
%19 = load i32, i32* %9
ret i32 %19
}
It seems to me that 'a' ends up having 9 different slots on the stack,
but it should really be merged into one slot, i.e. we should detect
that the phis all related to the same symbol, and create only one
alloca?
Regards
Dibyendu
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Sparse-llvm question regarding handling of phi/phsrc instructions
2017-09-07 16:04 Sparse-llvm question regarding handling of phi/phsrc instructions Dibyendu Majumdar
@ 2017-09-07 20:25 ` Luc Van Oostenryck
2017-09-07 20:33 ` Dibyendu Majumdar
0 siblings, 1 reply; 4+ messages in thread
From: Luc Van Oostenryck @ 2017-09-07 20:25 UTC (permalink / raw)
To: Dibyendu Majumdar; +Cc: Linux-Sparse
2017-09-07 18:04 GMT+02:00 Dibyendu Majumdar <mobile@majumdar.org.uk>:
> Hi,
>
> The following code snippet:
>
> static int test_do(void) {
> int a = 0;
> int count = 27;
> switch (count % 8) {
> case 0: do { a++;
> case 7: a++;
> case 6: a++;
> case 5: a++;
> case 4: a++;
> case 3: a++;
> case 2: a++;
> case 1: a++;
> } while ((count -= 8) > 0);
> }
> return a;
> }
>
> Results in following linear output:
>
> test_do:
> .L0:
> <entry-point>
> phisrc.32 %phi3(a) <- $0
> phisrc.32 %phi4(a) <- $0
> phisrc.32 %phi6(a) <- $0
> phisrc.32 %phi8(a) <- $0
> phisrc.32 %phi10(a) <- $0
> phisrc.32 %phi12(a) <- $0
> phisrc.32 %phi14(a) <- $0
> phisrc.32 %phi16(a) <- $0
> phisrc.32 %phi18(a) <- $0
> phisrc.32 %phi20(count) <- $27
> br .L7
>
> .L10:
> phi.32 %r3 <- %phi18(a), %phi19(a)
> add.32 %r4 <- %r3, $1
> phisrc.32 %phi17(a) <- %r4
> phi.32 %r5 <- %phi16(a), %phi17(a)
> ...
>
> This is translated to following LLVM IR:
>
> define internal i32 @test_do() {
> L0:
> %0 = alloca i32
> %1 = alloca i32
> %2 = alloca i32
> %3 = alloca i32
> %4 = alloca i32
> %5 = alloca i32
> %6 = alloca i32
> %7 = alloca i32
> %8 = alloca i32
> %9 = alloca i32
> store i32 0, i32* %9
> store i32 0, i32* %7
> store i32 0, i32* %6
> store i32 0, i32* %5
> store i32 0, i32* %4
> store i32 0, i32* %3
> store i32 0, i32* %2
> store i32 0, i32* %1
> store i32 0, i32* %0
> store i32 27, i32* %8
> br label %L2
>
> L1: ; preds = %L2
> %10 = load i32, i32* %0
> %R4 = add i32 %10, 1
> ...
>
> It seems to me that 'a' ends up having 9 different slots on the stack,
> but it should really be merged into one slot, i.e. we should detect
> that the phis all related to the same symbol, and create only one
> alloca?
Yes and no. It's just one of the problem with the SSA conversion
as done in rc5.
If you try the same code with newssa, you have something much
saner, no?
-- Luc
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Sparse-llvm question regarding handling of phi/phsrc instructions
2017-09-07 20:25 ` Luc Van Oostenryck
@ 2017-09-07 20:33 ` Dibyendu Majumdar
2017-09-08 10:11 ` Dibyendu Majumdar
0 siblings, 1 reply; 4+ messages in thread
From: Dibyendu Majumdar @ 2017-09-07 20:33 UTC (permalink / raw)
To: Luc Van Oostenryck; +Cc: Linux-Sparse
On 7 September 2017 at 21:25, Luc Van Oostenryck
<luc.vanoostenryck@gmail.com> wrote:
> 2017-09-07 18:04 GMT+02:00 Dibyendu Majumdar <mobile@majumdar.org.uk>:
>> The following code snippet:
>>
>> static int test_do(void) {
>> int a = 0;
>> int count = 27;
>> switch (count % 8) {
>> case 0: do { a++;
>> case 7: a++;
>> case 6: a++;
>> case 5: a++;
>> case 4: a++;
>> case 3: a++;
>> case 2: a++;
>> case 1: a++;
>> } while ((count -= 8) > 0);
>> }
>> return a;
>> }
>>
>> Results in following linear output:
>>
>> test_do:
>> .L0:
>> <entry-point>
>> phisrc.32 %phi3(a) <- $0
>> phisrc.32 %phi4(a) <- $0
>> phisrc.32 %phi6(a) <- $0
>> phisrc.32 %phi8(a) <- $0
>> phisrc.32 %phi10(a) <- $0
>> phisrc.32 %phi12(a) <- $0
>> phisrc.32 %phi14(a) <- $0
>> phisrc.32 %phi16(a) <- $0
>> phisrc.32 %phi18(a) <- $0
>> phisrc.32 %phi20(count) <- $27
>> br .L7
>>
>> .L10:
>> phi.32 %r3 <- %phi18(a), %phi19(a)
>> add.32 %r4 <- %r3, $1
>> phisrc.32 %phi17(a) <- %r4
>> phi.32 %r5 <- %phi16(a), %phi17(a)
>> ...
>>
>> This is translated to following LLVM IR:
>>
>> It seems to me that 'a' ends up having 9 different slots on the stack,
>> but it should really be merged into one slot, i.e. we should detect
>> that the phis all related to the same symbol, and create only one
>> alloca?
>
>
> Yes and no. It's just one of the problem with the SSA conversion
> as done in rc5.
> If you try the same code with newssa, you have something much
> saner, no?
>
Seems worse actually:
Here is a part of the linearized output pre simplifications. Note that
I haven't applied the fix you sent yesterday.
test_do:
.L0:
<entry-point>
mods.32 %r1 <- $27, $8
phisrc.32 %phi1(a) <- $0
phisrc.32 %phi3(a) <- $0
phisrc.32 %phi5(a) <- $0
phisrc.32 %phi7(a) <- $0
phisrc.32 %phi9(a) <- $0
phisrc.32 %phi11(a) <- $0
phisrc.32 %phi13(a) <- $0
phisrc.32 %phi15(count) <- $27
phisrc.32 %phi16(count) <- $27
phisrc.32 %phi17(count) <- $27
phisrc.32 %phi18(count) <- $27
phisrc.32 %phi19(count) <- $27
phisrc.32 %phi20(count) <- $27
phisrc.32 %phi21(count) <- $27
phisrc.32 %phi34(a) <- $0
switch.32 %r1, 0 -> .L2, 1 -> .L9, 2 -> .L8, 3 -> .L7, 4 -> .L6, 5
-> .L5, 6 -> .L4, 7 -> .L3, default -> .L1
.L2:
phisrc.32 %phi29(a) <- $0
phisrc.32 %phi31(count) <- $27
br .L10
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Sparse-llvm question regarding handling of phi/phsrc instructions
2017-09-07 20:33 ` Dibyendu Majumdar
@ 2017-09-08 10:11 ` Dibyendu Majumdar
0 siblings, 0 replies; 4+ messages in thread
From: Dibyendu Majumdar @ 2017-09-08 10:11 UTC (permalink / raw)
To: Luc Van Oostenryck; +Cc: Linux-Sparse
Hi Luc,
On 7 September 2017 at 21:33, Dibyendu Majumdar <mobile@majumdar.org.uk> wrote:
> On 7 September 2017 at 21:25, Luc Van Oostenryck
> <luc.vanoostenryck@gmail.com> wrote:
>> 2017-09-07 18:04 GMT+02:00 Dibyendu Majumdar <mobile@majumdar.org.uk>:
>>> The following code snippet:
>>>
>>> static int test_do(void) {
>>> int a = 0;
>>> int count = 27;
>>> switch (count % 8) {
>>> case 0: do { a++;
>>> case 7: a++;
>>> case 6: a++;
>>> case 5: a++;
>>> case 4: a++;
>>> case 3: a++;
>>> case 2: a++;
>>> case 1: a++;
>>> } while ((count -= 8) > 0);
>>> }
>>> return a;
>>> }
>>>
>>> Results in following linear output:
>>>
>>> test_do:
>>> .L0:
>>> <entry-point>
>>> phisrc.32 %phi3(a) <- $0
>>> phisrc.32 %phi4(a) <- $0
>>> phisrc.32 %phi6(a) <- $0
>>> phisrc.32 %phi8(a) <- $0
>>> phisrc.32 %phi10(a) <- $0
>>> phisrc.32 %phi12(a) <- $0
>>> phisrc.32 %phi14(a) <- $0
>>> phisrc.32 %phi16(a) <- $0
>>> phisrc.32 %phi18(a) <- $0
>>> phisrc.32 %phi20(count) <- $27
>>> br .L7
>>>
>>> .L10:
>>> phi.32 %r3 <- %phi18(a), %phi19(a)
>>> add.32 %r4 <- %r3, $1
>>> phisrc.32 %phi17(a) <- %r4
>>> phi.32 %r5 <- %phi16(a), %phi17(a)
>>> ...
>>>
>>> It seems to me that 'a' ends up having 9 different slots on the stack,
>>> but it should really be merged into one slot, i.e. we should detect
>>> that the phis all related to the same symbol, and create only one
>>> alloca?
>>
>>
What is also interesting is that in the old way by switching off the
simplifications the generated code avoids all the phis and is actually
simpler in terms of memory operations in the backend. Now LLVM can
sort this out easily as it transforms this to use phis anyway, but my
other backend is simpler, and for that non SSA version is better.
Regards
Dibyendu
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-09-08 10:11 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-07 16:04 Sparse-llvm question regarding handling of phi/phsrc instructions Dibyendu Majumdar
2017-09-07 20:25 ` Luc Van Oostenryck
2017-09-07 20:33 ` Dibyendu Majumdar
2017-09-08 10:11 ` Dibyendu Majumdar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).