* [PATCH 1/3] arm64: optimize mov_q assembler macro
@ 2020-03-04 9:36 Rémi Denis-Courmont
2020-03-04 14:34 ` Mark Rutland
0 siblings, 1 reply; 3+ messages in thread
From: Rémi Denis-Courmont @ 2020-03-04 9:36 UTC (permalink / raw)
To: linux-arm-kernel
From: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
This reduces the number of MOVZ and MOVK to expand a constant.
This makes several changes:
1) Save one instruction for exactly 32 or 48 bits unsigned values,
using unsigned MOV rather than signed MOV.
2) Save one instruction for unsigned 16 bits (or less) values, not
treating them as 32-bits values.
3) Skip any redundant MOVK instructions the 16-bits immediate would be
zero.
4) Use MOVN if it saves one or more MOVK over MOVZ.
Note that the assembler uses -1 as truth value (not +1 like C).
Signed-off-by: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
---
arch/arm64/include/asm/assembler.h | 45 ++++++++++++++++++++++++++----
1 file changed, 40 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index af03001293c6..2b98e2c5c8a2 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -473,22 +473,57 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
/*
* mov_q - move an immediate constant into a 64-bit register using
- * between 2 and 4 movz/movk instructions (depending on the
+ * between 1 and 4 mov/movk instructions (depending on the
* magnitude and sign of the operand)
*/
.macro mov_q, reg, val
- .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
- movz \reg, :abs_g1_s:\val
+ .if ((((\val) & 0xffff) == 0) + ((((\val) >> 16) & 0xffff) == 0) + ((((\val) >> 32) & 0xffff) == 0) + ((((\val) >> 48) & 0xffff) == 0) <= (((\val) & 0xffff) == 0xffff) + ((((\val) >> 16) & 0xffff) == 0xffff) + ((((\val) >> 32) & 0xffff) == 0xffff) + ((((\val) >> 48) & 0xffff) == 0xffff))
+ .if (((\val) >> 16) == 0)
+ movz \reg, :abs_g0:\val
.else
- .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
- movz \reg, :abs_g2_s:\val
+ .if (((\val) >> 32) == 0)
+ movz \reg, :abs_g1:\val
+ .else
+ .if (((\val) >> 48) == 0)
+ movz \reg, :abs_g2:\val
.else
movz \reg, :abs_g3:\val
+ .if ((((\val) >> 32) & 0xffff) != 0)
movk \reg, :abs_g2_nc:\val
.endif
+ .endif
+ .if ((((\val) >> 16) & 0xffff) != 0)
movk \reg, :abs_g1_nc:\val
.endif
+ .endif
+ .if (((\val) & 0xffff) != 0)
movk \reg, :abs_g0_nc:\val
+ .endif
+ .endif
+ .else
+ .if ((((\val) >> 16) & 0xffffffffffff) == 0xffffffffffff)
+ movn \reg, :abs_g0:~\val
+ .else
+ .if ((((\val) >> 32) & 0xffffffff) == 0xffffffff)
+ movn \reg, :abs_g1:~\val
+ .else
+ .if ((((\val) >> 48) & 0xffff) == 0xffff)
+ movn \reg, :abs_g2:~\val
+ .else
+ movn \reg, :abs_g3:~\val
+ .if ((((\val) >> 32) & 0xffff) != 0xffff)
+ movk \reg, :abs_g2_nc:\val
+ .endif
+ .endif
+ .if ((((\val) >> 16) & 0xffff) != 0xffff)
+ movk \reg, :abs_g1_nc:\val
+ .endif
+ .endif
+ .if (((\val) & 0xffff) != 0xffff)
+ movk \reg, :abs_g0_nc:\val
+ .endif
+ .endif
+ .endif
.endm
/*
--
2.25.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 1/3] arm64: optimize mov_q assembler macro
2020-03-04 9:36 [PATCH 1/3] arm64: optimize mov_q assembler macro Rémi Denis-Courmont
@ 2020-03-04 14:34 ` Mark Rutland
2020-03-04 15:30 ` Ard Biesheuvel
0 siblings, 1 reply; 3+ messages in thread
From: Mark Rutland @ 2020-03-04 14:34 UTC (permalink / raw)
To: Rémi Denis-Courmont
Cc: catalin.marinas, will, james.morse, linux-arm-kernel,
ard.biesheuvel
[adding arm64 folk]
When sendign a series like this, please send a cover letter, and thread
patches in reply to the cover letter.
Use `git format-patch --cover` to generate that, and `git send-email`
should thread things that way by default.
On Wed, Mar 04, 2020 at 11:36:29AM +0200, Rémi Denis-Courmont wrote:
> From: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
>
> This reduces the number of MOVZ and MOVK to expand a constant.
> This makes several changes:
>
> 1) Save one instruction for exactly 32 or 48 bits unsigned values,
> using unsigned MOV rather than signed MOV.
>
> 2) Save one instruction for unsigned 16 bits (or less) values, not
> treating them as 32-bits values.
>
> 3) Skip any redundant MOVK instructions the 16-bits immediate would be
> zero.
>
> 4) Use MOVN if it saves one or more MOVK over MOVZ.
>
> Note that the assembler uses -1 as truth value (not +1 like C).
>
> Signed-off-by: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
To be honest, I don't think this is worthwhile given how complex it
makes the mov_q macro.
Given the places we use mov_q today, I don't think this is going to give
any measurable performance improvement, but will make debugging more
painful than necessary.
Thanks,
Mark
> ---
> arch/arm64/include/asm/assembler.h | 45 ++++++++++++++++++++++++++----
> 1 file changed, 40 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index af03001293c6..2b98e2c5c8a2 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -473,22 +473,57 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
>
> /*
> * mov_q - move an immediate constant into a 64-bit register using
> - * between 2 and 4 movz/movk instructions (depending on the
> + * between 1 and 4 mov/movk instructions (depending on the
> * magnitude and sign of the operand)
> */
> .macro mov_q, reg, val
> - .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
> - movz \reg, :abs_g1_s:\val
> + .if ((((\val) & 0xffff) == 0) + ((((\val) >> 16) & 0xffff) == 0) + ((((\val) >> 32) & 0xffff) == 0) + ((((\val) >> 48) & 0xffff) == 0) <= (((\val) & 0xffff) == 0xffff) + ((((\val) >> 16) & 0xffff) == 0xffff) + ((((\val) >> 32) & 0xffff) == 0xffff) + ((((\val) >> 48) & 0xffff) == 0xffff))
> + .if (((\val) >> 16) == 0)
> + movz \reg, :abs_g0:\val
> .else
> - .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
> - movz \reg, :abs_g2_s:\val
> + .if (((\val) >> 32) == 0)
> + movz \reg, :abs_g1:\val
> + .else
> + .if (((\val) >> 48) == 0)
> + movz \reg, :abs_g2:\val
> .else
> movz \reg, :abs_g3:\val
> + .if ((((\val) >> 32) & 0xffff) != 0)
> movk \reg, :abs_g2_nc:\val
> .endif
> + .endif
> + .if ((((\val) >> 16) & 0xffff) != 0)
> movk \reg, :abs_g1_nc:\val
> .endif
> + .endif
> + .if (((\val) & 0xffff) != 0)
> movk \reg, :abs_g0_nc:\val
> + .endif
> + .endif
> + .else
> + .if ((((\val) >> 16) & 0xffffffffffff) == 0xffffffffffff)
> + movn \reg, :abs_g0:~\val
> + .else
> + .if ((((\val) >> 32) & 0xffffffff) == 0xffffffff)
> + movn \reg, :abs_g1:~\val
> + .else
> + .if ((((\val) >> 48) & 0xffff) == 0xffff)
> + movn \reg, :abs_g2:~\val
> + .else
> + movn \reg, :abs_g3:~\val
> + .if ((((\val) >> 32) & 0xffff) != 0xffff)
> + movk \reg, :abs_g2_nc:\val
> + .endif
> + .endif
> + .if ((((\val) >> 16) & 0xffff) != 0xffff)
> + movk \reg, :abs_g1_nc:\val
> + .endif
> + .endif
> + .if (((\val) & 0xffff) != 0xffff)
> + movk \reg, :abs_g0_nc:\val
> + .endif
> + .endif
> + .endif
> .endm
>
> /*
> --
> 2.25.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 1/3] arm64: optimize mov_q assembler macro
2020-03-04 14:34 ` Mark Rutland
@ 2020-03-04 15:30 ` Ard Biesheuvel
0 siblings, 0 replies; 3+ messages in thread
From: Ard Biesheuvel @ 2020-03-04 15:30 UTC (permalink / raw)
To: Mark Rutland
Cc: Catalin Marinas, Will Deacon, James Morse, linux-arm-kernel,
Rémi Denis-Courmont
On Wed, 4 Mar 2020 at 15:34, Mark Rutland <mark.rutland@arm.com> wrote:
>
> [adding arm64 folk]
>
> When sendign a series like this, please send a cover letter, and thread
> patches in reply to the cover letter.
>
> Use `git format-patch --cover` to generate that, and `git send-email`
> should thread things that way by default.
>
> On Wed, Mar 04, 2020 at 11:36:29AM +0200, Rémi Denis-Courmont wrote:
> > From: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
> >
> > This reduces the number of MOVZ and MOVK to expand a constant.
> > This makes several changes:
> >
> > 1) Save one instruction for exactly 32 or 48 bits unsigned values,
> > using unsigned MOV rather than signed MOV.
> >
> > 2) Save one instruction for unsigned 16 bits (or less) values, not
> > treating them as 32-bits values.
> >
> > 3) Skip any redundant MOVK instructions the 16-bits immediate would be
> > zero.
> >
> > 4) Use MOVN if it saves one or more MOVK over MOVZ.
> >
> > Note that the assembler uses -1 as truth value (not +1 like C).
> >
> > Signed-off-by: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
>
> To be honest, I don't think this is worthwhile given how complex it
> makes the mov_q macro.
>
> Given the places we use mov_q today, I don't think this is going to give
> any measurable performance improvement, but will make debugging more
> painful than necessary.
>
Agreed. For macros this complex, you better have some test code that
we can run to prove its correctness. But actually, this is something
we should bring up with the toolchain people: it would be much more
useful to have a pseudo-op for this that we can use everywhere, and
which will resolve to the 4 instruction sequence (and the associated
ELF relocations) by default, and gets optimized into something smaller
by the assembler if the value is known at that point.
> > ---
> > arch/arm64/include/asm/assembler.h | 45 ++++++++++++++++++++++++++----
> > 1 file changed, 40 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> > index af03001293c6..2b98e2c5c8a2 100644
> > --- a/arch/arm64/include/asm/assembler.h
> > +++ b/arch/arm64/include/asm/assembler.h
> > @@ -473,22 +473,57 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
> >
> > /*
> > * mov_q - move an immediate constant into a 64-bit register using
> > - * between 2 and 4 movz/movk instructions (depending on the
> > + * between 1 and 4 mov/movk instructions (depending on the
> > * magnitude and sign of the operand)
> > */
> > .macro mov_q, reg, val
> > - .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
> > - movz \reg, :abs_g1_s:\val
> > + .if ((((\val) & 0xffff) == 0) + ((((\val) >> 16) & 0xffff) == 0) + ((((\val) >> 32) & 0xffff) == 0) + ((((\val) >> 48) & 0xffff) == 0) <= (((\val) & 0xffff) == 0xffff) + ((((\val) >> 16) & 0xffff) == 0xffff) + ((((\val) >> 32) & 0xffff) == 0xffff) + ((((\val) >> 48) & 0xffff) == 0xffff))
> > + .if (((\val) >> 16) == 0)
> > + movz \reg, :abs_g0:\val
> > .else
> > - .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
> > - movz \reg, :abs_g2_s:\val
> > + .if (((\val) >> 32) == 0)
> > + movz \reg, :abs_g1:\val
> > + .else
> > + .if (((\val) >> 48) == 0)
> > + movz \reg, :abs_g2:\val
> > .else
> > movz \reg, :abs_g3:\val
> > + .if ((((\val) >> 32) & 0xffff) != 0)
> > movk \reg, :abs_g2_nc:\val
> > .endif
> > + .endif
> > + .if ((((\val) >> 16) & 0xffff) != 0)
> > movk \reg, :abs_g1_nc:\val
> > .endif
> > + .endif
> > + .if (((\val) & 0xffff) != 0)
> > movk \reg, :abs_g0_nc:\val
> > + .endif
> > + .endif
> > + .else
> > + .if ((((\val) >> 16) & 0xffffffffffff) == 0xffffffffffff)
> > + movn \reg, :abs_g0:~\val
> > + .else
> > + .if ((((\val) >> 32) & 0xffffffff) == 0xffffffff)
> > + movn \reg, :abs_g1:~\val
> > + .else
> > + .if ((((\val) >> 48) & 0xffff) == 0xffff)
> > + movn \reg, :abs_g2:~\val
> > + .else
> > + movn \reg, :abs_g3:~\val
> > + .if ((((\val) >> 32) & 0xffff) != 0xffff)
> > + movk \reg, :abs_g2_nc:\val
> > + .endif
> > + .endif
> > + .if ((((\val) >> 16) & 0xffff) != 0xffff)
> > + movk \reg, :abs_g1_nc:\val
> > + .endif
> > + .endif
> > + .if (((\val) & 0xffff) != 0xffff)
> > + movk \reg, :abs_g0_nc:\val
> > + .endif
> > + .endif
> > + .endif
> > .endm
> >
> > /*
> > --
> > 2.25.1
> >
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-03-04 15:30 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-03-04 9:36 [PATCH 1/3] arm64: optimize mov_q assembler macro Rémi Denis-Courmont
2020-03-04 14:34 ` Mark Rutland
2020-03-04 15:30 ` Ard Biesheuvel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).