Sparse-llvm question regarding handling of phi/phsrc instructions

linux-sparse.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* Sparse-llvm question regarding handling of phi/phsrc instructions
@ 2017-09-07 16:04 Dibyendu Majumdar
  2017-09-07 20:25 ` Luc Van Oostenryck
  0 siblings, 1 reply; 4+ messages in thread
From: Dibyendu Majumdar @ 2017-09-07 16:04 UTC (permalink / raw)
  To: Linux-Sparse

Hi,

The following code snippet:

static int test_do(void) {
    int a = 0;
    int count = 27;
    switch (count % 8) {
    case 0: do {  a++;
    case 7:       a++;
    case 6:       a++;
    case 5:       a++;
    case 4:       a++;
    case 3:       a++;
    case 2:       a++;
    case 1:       a++;
            } while ((count -= 8) > 0);
    }
    return a;
}

Results in following linear output:

test_do:
.L0:
        <entry-point>
        phisrc.32   %phi3(a) <- $0
        phisrc.32   %phi4(a) <- $0
        phisrc.32   %phi6(a) <- $0
        phisrc.32   %phi8(a) <- $0
        phisrc.32   %phi10(a) <- $0
        phisrc.32   %phi12(a) <- $0
        phisrc.32   %phi14(a) <- $0
        phisrc.32   %phi16(a) <- $0
        phisrc.32   %phi18(a) <- $0
        phisrc.32   %phi20(count) <- $27
        br          .L7

.L10:
        phi.32      %r3 <- %phi18(a), %phi19(a)
        add.32      %r4 <- %r3, $1
        phisrc.32   %phi17(a) <- %r4
        phi.32      %r5 <- %phi16(a), %phi17(a)
        add.32      %r6 <- %r5, $1
        phisrc.32   %phi15(a) <- %r6
        phi.32      %r7 <- %phi14(a), %phi15(a)
        add.32      %r8 <- %r7, $1
        phisrc.32   %phi13(a) <- %r8
        phi.32      %r9 <- %phi12(a), %phi13(a)
        add.32      %r10 <- %r9, $1
        phisrc.32   %phi11(a) <- %r10
        phi.32      %r11 <- %phi10(a), %phi11(a)
        add.32      %r12 <- %r11, $1
        phisrc.32   %phi9(a) <- %r12
        br          .L7

.L7:
        phi.32      %r13 <- %phi8(a), %phi9(a)
        add.32      %r14 <- %r13, $1
        phisrc.32   %phi7(a) <- %r14
        phi.32      %r15 <- %phi6(a), %phi7(a)
        add.32      %r16 <- %r15, $1
        phisrc.32   %phi5(a) <- %r16
        phi.32      %r17 <- %phi4(a), %phi5(a)
        add.32      %r18(a) <- %r17, $1
        phisrc.32   %phi2(a) <- %r18(a)
        phisrc.32   %phi19(a) <- %r18(a)
        phi.32      %r19 <- %phi20(count), %phi21(count)
        add.32      %r21 <- %r19, $-8
        setgt.32    %r23 <- %r21, $0
        phisrc.32   %phi21(count) <- %r21
        cbr         %r23, .L10, .L1

.L1:
        phi.32      %r24 <- %phi2(a), %phi3(a)
        ret.32      %r24


This is translated to following LLVM IR:

define internal i32 @test_do() {
L0:
  %0 = alloca i32
  %1 = alloca i32
  %2 = alloca i32
  %3 = alloca i32
  %4 = alloca i32
  %5 = alloca i32
  %6 = alloca i32
  %7 = alloca i32
  %8 = alloca i32
  %9 = alloca i32
  store i32 0, i32* %9
  store i32 0, i32* %7
  store i32 0, i32* %6
  store i32 0, i32* %5
  store i32 0, i32* %4
  store i32 0, i32* %3
  store i32 0, i32* %2
  store i32 0, i32* %1
  store i32 0, i32* %0
  store i32 27, i32* %8
  br label %L2

L1:                                               ; preds = %L2
  %10 = load i32, i32* %0
  %R4 = add i32 %10, 1
  store i32 %R4, i32* %1
  %11 = load i32, i32* %1
  %R6 = add i32 %11, 1
  store i32 %R6, i32* %2
  %12 = load i32, i32* %2
  %R8 = add i32 %12, 1
  store i32 %R8, i32* %3
  %13 = load i32, i32* %3
  %R10 = add i32 %13, 1
  store i32 %R10, i32* %4
  %14 = load i32, i32* %4
  %R12 = add i32 %14, 1
  store i32 %R12, i32* %5
  br label %L2

L2:                                               ; preds = %L1, %L0
  %15 = load i32, i32* %5
  %R14 = add i32 %15, 1
  store i32 %R14, i32* %6
  %16 = load i32, i32* %6
  %R16 = add i32 %16, 1
  store i32 %R16, i32* %7
  %17 = load i32, i32* %7
  %R18 = add i32 %17, 1
  store i32 %R18, i32* %9
  store i32 %R18, i32* %0
  %18 = load i32, i32* %8
  %R21 = add i32 %18, -8
  %R23 = icmp sgt i32 %R21, 0
  %R231 = zext i1 %R23 to i32
  store i32 %R21, i32* %8
  %cond = icmp ne i32 %R231, 0
  br i1 %cond, label %L1, label %L3

L3:                                               ; preds = %L2
  %19 = load i32, i32* %9
  ret i32 %19
}

It seems to me that 'a' ends up having 9 different slots on the stack,
but it should really be merged into one slot, i.e. we should detect
that the phis all related to the same symbol, and create only one
alloca?

Regards
Dibyendu

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Sparse-llvm question regarding handling of phi/phsrc instructions
  2017-09-07 16:04 Sparse-llvm question regarding handling of phi/phsrc instructions Dibyendu Majumdar
@ 2017-09-07 20:25 ` Luc Van Oostenryck
  2017-09-07 20:33   ` Dibyendu Majumdar
  0 siblings, 1 reply; 4+ messages in thread
From: Luc Van Oostenryck @ 2017-09-07 20:25 UTC (permalink / raw)
  To: Dibyendu Majumdar; +Cc: Linux-Sparse

2017-09-07 18:04 GMT+02:00 Dibyendu Majumdar <mobile@majumdar.org.uk>:
> Hi,
>
> The following code snippet:
>
> static int test_do(void) {
>     int a = 0;
>     int count = 27;
>     switch (count % 8) {
>     case 0: do {  a++;
>     case 7:       a++;
>     case 6:       a++;
>     case 5:       a++;
>     case 4:       a++;
>     case 3:       a++;
>     case 2:       a++;
>     case 1:       a++;
>             } while ((count -= 8) > 0);
>     }
>     return a;
> }
>
> Results in following linear output:
>
> test_do:
> .L0:
>         <entry-point>
>         phisrc.32   %phi3(a) <- $0
>         phisrc.32   %phi4(a) <- $0
>         phisrc.32   %phi6(a) <- $0
>         phisrc.32   %phi8(a) <- $0
>         phisrc.32   %phi10(a) <- $0
>         phisrc.32   %phi12(a) <- $0
>         phisrc.32   %phi14(a) <- $0
>         phisrc.32   %phi16(a) <- $0
>         phisrc.32   %phi18(a) <- $0
>         phisrc.32   %phi20(count) <- $27
>         br          .L7
>
> .L10:
>         phi.32      %r3 <- %phi18(a), %phi19(a)
>         add.32      %r4 <- %r3, $1
>         phisrc.32   %phi17(a) <- %r4
>         phi.32      %r5 <- %phi16(a), %phi17(a)
> ...
>
> This is translated to following LLVM IR:
>
> define internal i32 @test_do() {
> L0:
>   %0 = alloca i32
>   %1 = alloca i32
>   %2 = alloca i32
>   %3 = alloca i32
>   %4 = alloca i32
>   %5 = alloca i32
>   %6 = alloca i32
>   %7 = alloca i32
>   %8 = alloca i32
>   %9 = alloca i32
>   store i32 0, i32* %9
>   store i32 0, i32* %7
>   store i32 0, i32* %6
>   store i32 0, i32* %5
>   store i32 0, i32* %4
>   store i32 0, i32* %3
>   store i32 0, i32* %2
>   store i32 0, i32* %1
>   store i32 0, i32* %0
>   store i32 27, i32* %8
>   br label %L2
>
> L1:                                               ; preds = %L2
>   %10 = load i32, i32* %0
>   %R4 = add i32 %10, 1
> ...
>
> It seems to me that 'a' ends up having 9 different slots on the stack,
> but it should really be merged into one slot, i.e. we should detect
> that the phis all related to the same symbol, and create only one
> alloca?


Yes and no. It's just one of the problem with the SSA conversion
as done in rc5.
If you try the same code with newssa, you have something much
saner, no?

-- Luc

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Sparse-llvm question regarding handling of phi/phsrc instructions
  2017-09-07 20:25 ` Luc Van Oostenryck
@ 2017-09-07 20:33   ` Dibyendu Majumdar
  2017-09-08 10:11     ` Dibyendu Majumdar
  0 siblings, 1 reply; 4+ messages in thread
From: Dibyendu Majumdar @ 2017-09-07 20:33 UTC (permalink / raw)
  To: Luc Van Oostenryck; +Cc: Linux-Sparse

On 7 September 2017 at 21:25, Luc Van Oostenryck
<luc.vanoostenryck@gmail.com> wrote:
> 2017-09-07 18:04 GMT+02:00 Dibyendu Majumdar <mobile@majumdar.org.uk>:
>> The following code snippet:
>>
>> static int test_do(void) {
>>     int a = 0;
>>     int count = 27;
>>     switch (count % 8) {
>>     case 0: do {  a++;
>>     case 7:       a++;
>>     case 6:       a++;
>>     case 5:       a++;
>>     case 4:       a++;
>>     case 3:       a++;
>>     case 2:       a++;
>>     case 1:       a++;
>>             } while ((count -= 8) > 0);
>>     }
>>     return a;
>> }
>>
>> Results in following linear output:
>>
>> test_do:
>> .L0:
>>         <entry-point>
>>         phisrc.32   %phi3(a) <- $0
>>         phisrc.32   %phi4(a) <- $0
>>         phisrc.32   %phi6(a) <- $0
>>         phisrc.32   %phi8(a) <- $0
>>         phisrc.32   %phi10(a) <- $0
>>         phisrc.32   %phi12(a) <- $0
>>         phisrc.32   %phi14(a) <- $0
>>         phisrc.32   %phi16(a) <- $0
>>         phisrc.32   %phi18(a) <- $0
>>         phisrc.32   %phi20(count) <- $27
>>         br          .L7
>>
>> .L10:
>>         phi.32      %r3 <- %phi18(a), %phi19(a)
>>         add.32      %r4 <- %r3, $1
>>         phisrc.32   %phi17(a) <- %r4
>>         phi.32      %r5 <- %phi16(a), %phi17(a)
>> ...
>>
>> This is translated to following LLVM IR:
>>
>> It seems to me that 'a' ends up having 9 different slots on the stack,
>> but it should really be merged into one slot, i.e. we should detect
>> that the phis all related to the same symbol, and create only one
>> alloca?
>
>
> Yes and no. It's just one of the problem with the SSA conversion
> as done in rc5.
> If you try the same code with newssa, you have something much
> saner, no?
>

Seems worse actually:

Here is a part of the linearized output pre simplifications. Note that
I haven't applied the fix you sent yesterday.

test_do:
.L0:
<entry-point>
mods.32     %r1 <- $27, $8
phisrc.32   %phi1(a) <- $0
phisrc.32   %phi3(a) <- $0
phisrc.32   %phi5(a) <- $0
phisrc.32   %phi7(a) <- $0
phisrc.32   %phi9(a) <- $0
phisrc.32   %phi11(a) <- $0
phisrc.32   %phi13(a) <- $0
phisrc.32   %phi15(count) <- $27
phisrc.32   %phi16(count) <- $27
phisrc.32   %phi17(count) <- $27
phisrc.32   %phi18(count) <- $27
phisrc.32   %phi19(count) <- $27
phisrc.32   %phi20(count) <- $27
phisrc.32   %phi21(count) <- $27
phisrc.32   %phi34(a) <- $0
switch.32   %r1, 0 -> .L2, 1 -> .L9, 2 -> .L8, 3 -> .L7, 4 -> .L6, 5
-> .L5, 6 -> .L4, 7 -> .L3, default -> .L1

.L2:
phisrc.32   %phi29(a) <- $0
phisrc.32   %phi31(count) <- $27
br          .L10

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Sparse-llvm question regarding handling of phi/phsrc instructions
  2017-09-07 20:33   ` Dibyendu Majumdar
@ 2017-09-08 10:11     ` Dibyendu Majumdar
  0 siblings, 0 replies; 4+ messages in thread
From: Dibyendu Majumdar @ 2017-09-08 10:11 UTC (permalink / raw)
  To: Luc Van Oostenryck; +Cc: Linux-Sparse

Hi Luc,

On 7 September 2017 at 21:33, Dibyendu Majumdar <mobile@majumdar.org.uk> wrote:
> On 7 September 2017 at 21:25, Luc Van Oostenryck
> <luc.vanoostenryck@gmail.com> wrote:
>> 2017-09-07 18:04 GMT+02:00 Dibyendu Majumdar <mobile@majumdar.org.uk>:
>>> The following code snippet:
>>>
>>> static int test_do(void) {
>>>     int a = 0;
>>>     int count = 27;
>>>     switch (count % 8) {
>>>     case 0: do {  a++;
>>>     case 7:       a++;
>>>     case 6:       a++;
>>>     case 5:       a++;
>>>     case 4:       a++;
>>>     case 3:       a++;
>>>     case 2:       a++;
>>>     case 1:       a++;
>>>             } while ((count -= 8) > 0);
>>>     }
>>>     return a;
>>> }
>>>
>>> Results in following linear output:
>>>
>>> test_do:
>>> .L0:
>>>         <entry-point>
>>>         phisrc.32   %phi3(a) <- $0
>>>         phisrc.32   %phi4(a) <- $0
>>>         phisrc.32   %phi6(a) <- $0
>>>         phisrc.32   %phi8(a) <- $0
>>>         phisrc.32   %phi10(a) <- $0
>>>         phisrc.32   %phi12(a) <- $0
>>>         phisrc.32   %phi14(a) <- $0
>>>         phisrc.32   %phi16(a) <- $0
>>>         phisrc.32   %phi18(a) <- $0
>>>         phisrc.32   %phi20(count) <- $27
>>>         br          .L7
>>>
>>> .L10:
>>>         phi.32      %r3 <- %phi18(a), %phi19(a)
>>>         add.32      %r4 <- %r3, $1
>>>         phisrc.32   %phi17(a) <- %r4
>>>         phi.32      %r5 <- %phi16(a), %phi17(a)
>>> ...
>>>
>>> It seems to me that 'a' ends up having 9 different slots on the stack,
>>> but it should really be merged into one slot, i.e. we should detect
>>> that the phis all related to the same symbol, and create only one
>>> alloca?
>>
>>

What is also interesting is that in the old way by switching off the
simplifications the generated code avoids all the phis and is actually
simpler in terms of memory operations in the backend. Now LLVM can
sort this out easily as it transforms this to use phis anyway, but my
other backend is simpler, and for that non SSA version is better.

Regards
Dibyendu

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-09-08 10:11 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-07 16:04 Sparse-llvm question regarding handling of phi/phsrc instructions Dibyendu Majumdar
2017-09-07 20:25 ` Luc Van Oostenryck
2017-09-07 20:33   ` Dibyendu Majumdar
2017-09-08 10:11     ` Dibyendu Majumdar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).