linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] arm64: optimize mov_q assembler macro
@ 2020-03-04  9:36 Rémi Denis-Courmont
  2020-03-04 14:34 ` Mark Rutland
  0 siblings, 1 reply; 3+ messages in thread
From: Rémi Denis-Courmont @ 2020-03-04  9:36 UTC (permalink / raw)
  To: linux-arm-kernel

From: Remi Denis-Courmont <remi.denis.courmont@huawei.com>

This reduces the number of MOVZ and MOVK to expand a constant.
This makes several changes:

1) Save one instruction for exactly 32 or 48 bits unsigned values,
using unsigned MOV rather than signed MOV.

2) Save one instruction for unsigned 16 bits (or less) values, not
treating them as 32-bits values.

3) Skip any redundant MOVK instructions the 16-bits immediate would be
zero.

4) Use MOVN if it saves one or more MOVK over MOVZ.

Note that the assembler uses -1 as truth value (not +1 like C).

Signed-off-by: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
---
 arch/arm64/include/asm/assembler.h | 45 ++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index af03001293c6..2b98e2c5c8a2 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -473,22 +473,57 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 
 	/*
 	 * mov_q - move an immediate constant into a 64-bit register using
-	 *         between 2 and 4 movz/movk instructions (depending on the
+	 *         between 1 and 4 mov/movk instructions (depending on the
 	 *         magnitude and sign of the operand)
 	 */
 	.macro	mov_q, reg, val
-	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
-	movz	\reg, :abs_g1_s:\val
+	.if ((((\val) & 0xffff) == 0) + ((((\val) >> 16) & 0xffff) == 0) + ((((\val) >> 32) & 0xffff) == 0) + ((((\val) >> 48) & 0xffff) == 0) <= (((\val) & 0xffff) == 0xffff) + ((((\val) >> 16) & 0xffff) == 0xffff) + ((((\val) >> 32) & 0xffff) == 0xffff) + ((((\val) >> 48) & 0xffff) == 0xffff))
+	.if (((\val) >> 16) == 0)
+	movz	\reg, :abs_g0:\val
 	.else
-	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
-	movz	\reg, :abs_g2_s:\val
+	.if (((\val) >> 32) == 0)
+	movz	\reg, :abs_g1:\val
+	.else
+	.if (((\val) >> 48) == 0)
+	movz	\reg, :abs_g2:\val
 	.else
 	movz	\reg, :abs_g3:\val
+	.if ((((\val) >> 32) & 0xffff) != 0)
 	movk	\reg, :abs_g2_nc:\val
 	.endif
+	.endif
+	.if ((((\val) >> 16) & 0xffff) != 0)
 	movk	\reg, :abs_g1_nc:\val
 	.endif
+	.endif
+	.if (((\val) & 0xffff) != 0)
 	movk	\reg, :abs_g0_nc:\val
+	.endif
+	.endif
+	.else
+	.if ((((\val) >> 16) & 0xffffffffffff) == 0xffffffffffff)
+	movn	\reg, :abs_g0:~\val
+	.else
+	.if ((((\val) >> 32) & 0xffffffff) == 0xffffffff)
+	movn	\reg, :abs_g1:~\val
+	.else
+	.if ((((\val) >> 48) & 0xffff) == 0xffff)
+	movn	\reg, :abs_g2:~\val
+	.else
+	movn	\reg, :abs_g3:~\val
+	.if ((((\val) >> 32) & 0xffff) != 0xffff)
+	movk	\reg, :abs_g2_nc:\val
+	.endif
+	.endif
+	.if ((((\val) >> 16) & 0xffff) != 0xffff)
+	movk	\reg, :abs_g1_nc:\val
+	.endif
+	.endif
+	.if (((\val) & 0xffff) != 0xffff)
+	movk	\reg, :abs_g0_nc:\val
+	.endif
+	.endif
+	.endif
 	.endm
 
 /*
-- 
2.25.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/3] arm64: optimize mov_q assembler macro
  2020-03-04  9:36 [PATCH 1/3] arm64: optimize mov_q assembler macro Rémi Denis-Courmont
@ 2020-03-04 14:34 ` Mark Rutland
  2020-03-04 15:30   ` Ard Biesheuvel
  0 siblings, 1 reply; 3+ messages in thread
From: Mark Rutland @ 2020-03-04 14:34 UTC (permalink / raw)
  To: Rémi Denis-Courmont
  Cc: catalin.marinas, will, james.morse, linux-arm-kernel,
	ard.biesheuvel

[adding arm64 folk]

When sendign a series like this, please send a cover letter, and thread
patches in reply to the cover letter.

Use `git format-patch --cover` to generate that, and `git send-email`
should thread things that way by default.

On Wed, Mar 04, 2020 at 11:36:29AM +0200, Rémi Denis-Courmont wrote:
> From: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
> 
> This reduces the number of MOVZ and MOVK to expand a constant.
> This makes several changes:
> 
> 1) Save one instruction for exactly 32 or 48 bits unsigned values,
> using unsigned MOV rather than signed MOV.
> 
> 2) Save one instruction for unsigned 16 bits (or less) values, not
> treating them as 32-bits values.
> 
> 3) Skip any redundant MOVK instructions the 16-bits immediate would be
> zero.
> 
> 4) Use MOVN if it saves one or more MOVK over MOVZ.
> 
> Note that the assembler uses -1 as truth value (not +1 like C).
> 
> Signed-off-by: Remi Denis-Courmont <remi.denis.courmont@huawei.com>

To be honest, I don't think this is worthwhile given how complex it
makes the mov_q macro.

Given the places we use mov_q today, I don't think this is going to give
any measurable performance improvement, but will make debugging more
painful than necessary.

Thanks,
Mark

> ---
>  arch/arm64/include/asm/assembler.h | 45 ++++++++++++++++++++++++++----
>  1 file changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index af03001293c6..2b98e2c5c8a2 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -473,22 +473,57 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
>  
>  	/*
>  	 * mov_q - move an immediate constant into a 64-bit register using
> -	 *         between 2 and 4 movz/movk instructions (depending on the
> +	 *         between 1 and 4 mov/movk instructions (depending on the
>  	 *         magnitude and sign of the operand)
>  	 */
>  	.macro	mov_q, reg, val
> -	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
> -	movz	\reg, :abs_g1_s:\val
> +	.if ((((\val) & 0xffff) == 0) + ((((\val) >> 16) & 0xffff) == 0) + ((((\val) >> 32) & 0xffff) == 0) + ((((\val) >> 48) & 0xffff) == 0) <= (((\val) & 0xffff) == 0xffff) + ((((\val) >> 16) & 0xffff) == 0xffff) + ((((\val) >> 32) & 0xffff) == 0xffff) + ((((\val) >> 48) & 0xffff) == 0xffff))
> +	.if (((\val) >> 16) == 0)
> +	movz	\reg, :abs_g0:\val
>  	.else
> -	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
> -	movz	\reg, :abs_g2_s:\val
> +	.if (((\val) >> 32) == 0)
> +	movz	\reg, :abs_g1:\val
> +	.else
> +	.if (((\val) >> 48) == 0)
> +	movz	\reg, :abs_g2:\val
>  	.else
>  	movz	\reg, :abs_g3:\val
> +	.if ((((\val) >> 32) & 0xffff) != 0)
>  	movk	\reg, :abs_g2_nc:\val
>  	.endif
> +	.endif
> +	.if ((((\val) >> 16) & 0xffff) != 0)
>  	movk	\reg, :abs_g1_nc:\val
>  	.endif
> +	.endif
> +	.if (((\val) & 0xffff) != 0)
>  	movk	\reg, :abs_g0_nc:\val
> +	.endif
> +	.endif
> +	.else
> +	.if ((((\val) >> 16) & 0xffffffffffff) == 0xffffffffffff)
> +	movn	\reg, :abs_g0:~\val
> +	.else
> +	.if ((((\val) >> 32) & 0xffffffff) == 0xffffffff)
> +	movn	\reg, :abs_g1:~\val
> +	.else
> +	.if ((((\val) >> 48) & 0xffff) == 0xffff)
> +	movn	\reg, :abs_g2:~\val
> +	.else
> +	movn	\reg, :abs_g3:~\val
> +	.if ((((\val) >> 32) & 0xffff) != 0xffff)
> +	movk	\reg, :abs_g2_nc:\val
> +	.endif
> +	.endif
> +	.if ((((\val) >> 16) & 0xffff) != 0xffff)
> +	movk	\reg, :abs_g1_nc:\val
> +	.endif
> +	.endif
> +	.if (((\val) & 0xffff) != 0xffff)
> +	movk	\reg, :abs_g0_nc:\val
> +	.endif
> +	.endif
> +	.endif
>  	.endm
>  
>  /*
> -- 
> 2.25.1
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/3] arm64: optimize mov_q assembler macro
  2020-03-04 14:34 ` Mark Rutland
@ 2020-03-04 15:30   ` Ard Biesheuvel
  0 siblings, 0 replies; 3+ messages in thread
From: Ard Biesheuvel @ 2020-03-04 15:30 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Catalin Marinas, Will Deacon, James Morse, linux-arm-kernel,
	Rémi Denis-Courmont

On Wed, 4 Mar 2020 at 15:34, Mark Rutland <mark.rutland@arm.com> wrote:
>
> [adding arm64 folk]
>
> When sendign a series like this, please send a cover letter, and thread
> patches in reply to the cover letter.
>
> Use `git format-patch --cover` to generate that, and `git send-email`
> should thread things that way by default.
>
> On Wed, Mar 04, 2020 at 11:36:29AM +0200, Rémi Denis-Courmont wrote:
> > From: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
> >
> > This reduces the number of MOVZ and MOVK to expand a constant.
> > This makes several changes:
> >
> > 1) Save one instruction for exactly 32 or 48 bits unsigned values,
> > using unsigned MOV rather than signed MOV.
> >
> > 2) Save one instruction for unsigned 16 bits (or less) values, not
> > treating them as 32-bits values.
> >
> > 3) Skip any redundant MOVK instructions the 16-bits immediate would be
> > zero.
> >
> > 4) Use MOVN if it saves one or more MOVK over MOVZ.
> >
> > Note that the assembler uses -1 as truth value (not +1 like C).
> >
> > Signed-off-by: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
>
> To be honest, I don't think this is worthwhile given how complex it
> makes the mov_q macro.
>
> Given the places we use mov_q today, I don't think this is going to give
> any measurable performance improvement, but will make debugging more
> painful than necessary.
>

Agreed. For macros this complex, you better have some test code that
we can run to prove its correctness. But actually, this is something
we should bring up with the toolchain people: it would be much more
useful to have a pseudo-op for this that we can use everywhere, and
which will resolve to the 4 instruction sequence (and the associated
ELF relocations) by default, and gets optimized into something smaller
by the assembler if the value is known at that point.



> > ---
> >  arch/arm64/include/asm/assembler.h | 45 ++++++++++++++++++++++++++----
> >  1 file changed, 40 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> > index af03001293c6..2b98e2c5c8a2 100644
> > --- a/arch/arm64/include/asm/assembler.h
> > +++ b/arch/arm64/include/asm/assembler.h
> > @@ -473,22 +473,57 @@ USER(\label, ic ivau, \tmp2)                    // invalidate I line PoU
> >
> >       /*
> >        * mov_q - move an immediate constant into a 64-bit register using
> > -      *         between 2 and 4 movz/movk instructions (depending on the
> > +      *         between 1 and 4 mov/movk instructions (depending on the
> >        *         magnitude and sign of the operand)
> >        */
> >       .macro  mov_q, reg, val
> > -     .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
> > -     movz    \reg, :abs_g1_s:\val
> > +     .if ((((\val) & 0xffff) == 0) + ((((\val) >> 16) & 0xffff) == 0) + ((((\val) >> 32) & 0xffff) == 0) + ((((\val) >> 48) & 0xffff) == 0) <= (((\val) & 0xffff) == 0xffff) + ((((\val) >> 16) & 0xffff) == 0xffff) + ((((\val) >> 32) & 0xffff) == 0xffff) + ((((\val) >> 48) & 0xffff) == 0xffff))
> > +     .if (((\val) >> 16) == 0)
> > +     movz    \reg, :abs_g0:\val
> >       .else
> > -     .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
> > -     movz    \reg, :abs_g2_s:\val
> > +     .if (((\val) >> 32) == 0)
> > +     movz    \reg, :abs_g1:\val
> > +     .else
> > +     .if (((\val) >> 48) == 0)
> > +     movz    \reg, :abs_g2:\val
> >       .else
> >       movz    \reg, :abs_g3:\val
> > +     .if ((((\val) >> 32) & 0xffff) != 0)
> >       movk    \reg, :abs_g2_nc:\val
> >       .endif
> > +     .endif
> > +     .if ((((\val) >> 16) & 0xffff) != 0)
> >       movk    \reg, :abs_g1_nc:\val
> >       .endif
> > +     .endif
> > +     .if (((\val) & 0xffff) != 0)
> >       movk    \reg, :abs_g0_nc:\val
> > +     .endif
> > +     .endif
> > +     .else
> > +     .if ((((\val) >> 16) & 0xffffffffffff) == 0xffffffffffff)
> > +     movn    \reg, :abs_g0:~\val
> > +     .else
> > +     .if ((((\val) >> 32) & 0xffffffff) == 0xffffffff)
> > +     movn    \reg, :abs_g1:~\val
> > +     .else
> > +     .if ((((\val) >> 48) & 0xffff) == 0xffff)
> > +     movn    \reg, :abs_g2:~\val
> > +     .else
> > +     movn    \reg, :abs_g3:~\val
> > +     .if ((((\val) >> 32) & 0xffff) != 0xffff)
> > +     movk    \reg, :abs_g2_nc:\val
> > +     .endif
> > +     .endif
> > +     .if ((((\val) >> 16) & 0xffff) != 0xffff)
> > +     movk    \reg, :abs_g1_nc:\val
> > +     .endif
> > +     .endif
> > +     .if (((\val) & 0xffff) != 0xffff)
> > +     movk    \reg, :abs_g0_nc:\val
> > +     .endif
> > +     .endif
> > +     .endif
> >       .endm
> >
> >  /*
> > --
> > 2.25.1
> >
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-03-04 15:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-03-04  9:36 [PATCH 1/3] arm64: optimize mov_q assembler macro Rémi Denis-Courmont
2020-03-04 14:34 ` Mark Rutland
2020-03-04 15:30   ` Ard Biesheuvel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).