All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Reduce assembly code size of exception entry points
@ 2024-02-14 10:35 Frediano Ziglio
  2024-02-14 13:56 ` Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Frediano Ziglio @ 2024-02-14 10:35 UTC (permalink / raw)
  To: xen-devel
  Cc: Frediano Ziglio, Jan Beulich, Andrew Cooper, Roger Pau Monné,
	Wei Liu

We just pushed a 8-bytes zero and exception constants are
small so we can just write a single byte saving 3 bytes for
instruction.
With ENDBR64 this reduces the size of many entry points from 32 to
16 bytes (due to alignment).
Similar code is already used in autogen_stubs.

Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
--
v2:
- added missing entry points;
- add mention to autogen_stubs code, as suggested.
---
 xen/arch/x86/x86_64/entry.S | 40 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index ecdd6e5b47..a28a0d4044 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -389,7 +389,7 @@ FUNC(entry_int80)
         ENDBR64
         ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
         pushq $0
-        movl  $0x80, 4(%rsp)
+        movb  $0x80, 4(%rsp)
         SAVE_ALL
 
         SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
@@ -653,7 +653,7 @@ END(ret_from_intr)
         .section .init.text, "ax", @progbits
 FUNC(early_page_fault)
         ENDBR64
-        movl  $X86_EXC_PF, 4(%rsp)
+        movb  $X86_EXC_PF, 4(%rsp)
         SAVE_ALL
         movq  %rsp, %rdi
         call  do_early_page_fault
@@ -898,105 +898,105 @@ END(handle_exception)
 FUNC(entry_DE)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_DE, 4(%rsp)
+        movb  $X86_EXC_DE, 4(%rsp)
         jmp   handle_exception
 END(entry_DE)
 
 FUNC(entry_MF)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_MF, 4(%rsp)
+        movb  $X86_EXC_MF, 4(%rsp)
         jmp   handle_exception
 END(entry_MF)
 
 FUNC(entry_XM)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_XM, 4(%rsp)
+        movb  $X86_EXC_XM, 4(%rsp)
         jmp   handle_exception
 END(entry_XM)
 
 FUNC(entry_NM)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_NM, 4(%rsp)
+        movb  $X86_EXC_NM, 4(%rsp)
         jmp   handle_exception
 END(entry_NM)
 
 FUNC(entry_DB)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_DB, 4(%rsp)
+        movb  $X86_EXC_DB, 4(%rsp)
         jmp   handle_ist_exception
 END(entry_DB)
 
 FUNC(entry_BP)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_BP, 4(%rsp)
+        movb  $X86_EXC_BP, 4(%rsp)
         jmp   handle_exception
 END(entry_BP)
 
 FUNC(entry_OF)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_OF, 4(%rsp)
+        movb  $X86_EXC_OF, 4(%rsp)
         jmp   handle_exception
 END(entry_OF)
 
 FUNC(entry_BR)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_BR, 4(%rsp)
+        movb  $X86_EXC_BR, 4(%rsp)
         jmp   handle_exception
 END(entry_BR)
 
 FUNC(entry_UD)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_UD, 4(%rsp)
+        movb  $X86_EXC_UD, 4(%rsp)
         jmp   handle_exception
 END(entry_UD)
 
 FUNC(entry_TS)
         ENDBR64
-        movl  $X86_EXC_TS, 4(%rsp)
+        movb  $X86_EXC_TS, 4(%rsp)
         jmp   handle_exception
 END(entry_TS)
 
 FUNC(entry_NP)
         ENDBR64
-        movl  $X86_EXC_NP, 4(%rsp)
+        movb  $X86_EXC_NP, 4(%rsp)
         jmp   handle_exception
 END(entry_NP)
 
 FUNC(entry_SS)
         ENDBR64
-        movl  $X86_EXC_SS, 4(%rsp)
+        movb  $X86_EXC_SS, 4(%rsp)
         jmp   handle_exception
 END(entry_SS)
 
 FUNC(entry_GP)
         ENDBR64
-        movl  $X86_EXC_GP, 4(%rsp)
+        movb  $X86_EXC_GP, 4(%rsp)
         jmp   handle_exception
 END(entry_GP)
 
 FUNC(entry_AC)
         ENDBR64
-        movl  $X86_EXC_AC, 4(%rsp)
+        movb  $X86_EXC_AC, 4(%rsp)
         jmp   handle_exception
 END(entry_AC)
 
 FUNC(entry_CP)
         ENDBR64
-        movl  $X86_EXC_CP, 4(%rsp)
+        movb  $X86_EXC_CP, 4(%rsp)
         jmp   handle_exception
 END(entry_CP)
 
 FUNC(entry_DF)
         ENDBR64
-        movl  $X86_EXC_DF, 4(%rsp)
+        movb  $X86_EXC_DF, 4(%rsp)
         /* Set AC to reduce chance of further SMAP faults */
         ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP
         SAVE_ALL
@@ -1022,7 +1022,7 @@ END(entry_DF)
 FUNC(entry_NMI)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_NMI, 4(%rsp)
+        movb  $X86_EXC_NMI, 4(%rsp)
 END(entry_NMI)
 
 FUNC(handle_ist_exception)
@@ -1158,7 +1158,7 @@ END(handle_ist_exception)
 FUNC(entry_MC)
         ENDBR64
         pushq $0
-        movl  $X86_EXC_MC, 4(%rsp)
+        movb  $X86_EXC_MC, 4(%rsp)
         jmp   handle_ist_exception
 END(entry_MC)
 
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 10:35 [PATCH v2] Reduce assembly code size of exception entry points Frediano Ziglio
@ 2024-02-14 13:56 ` Jan Beulich
  2024-02-14 14:20 ` Jan Beulich
  2024-02-14 15:02 ` Roger Pau Monné
  2 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2024-02-14 13:56 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: Andrew Cooper, Roger Pau Monné, xen-devel, Wei Liu

On 14.02.2024 11:35, Frediano Ziglio wrote:
> We just pushed a 8-bytes zero

This part is now somewhat stale.

> and exception constants are
> small so we can just write a single byte saving 3 bytes for
> instruction.
> With ENDBR64 this reduces the size of many entry points from 32 to
> 16 bytes (due to alignment).
> Similar code is already used in autogen_stubs.
> 
> Signed-off-by: Frediano Ziglio <frediano.ziglio@cloud.com>
> --
> v2:
> - added missing entry points;

What about entry_int82?

> @@ -653,7 +653,7 @@ END(ret_from_intr)
>          .section .init.text, "ax", @progbits
>  FUNC(early_page_fault)
>          ENDBR64
> -        movl  $X86_EXC_PF, 4(%rsp)
> +        movb  $X86_EXC_PF, 4(%rsp)
>          SAVE_ALL
>          movq  %rsp, %rdi
>          call  do_early_page_fault

Between this and the next hunk there's also entry_PF.

Jan

> @@ -898,105 +898,105 @@ END(handle_exception)
>  FUNC(entry_DE)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_DE, 4(%rsp)
> +        movb  $X86_EXC_DE, 4(%rsp)
>          jmp   handle_exception
>  END(entry_DE)
>  
>  FUNC(entry_MF)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_MF, 4(%rsp)
> +        movb  $X86_EXC_MF, 4(%rsp)
>          jmp   handle_exception
>  END(entry_MF)
>  
>  FUNC(entry_XM)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_XM, 4(%rsp)
> +        movb  $X86_EXC_XM, 4(%rsp)
>          jmp   handle_exception
>  END(entry_XM)
>  
>  FUNC(entry_NM)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_NM, 4(%rsp)
> +        movb  $X86_EXC_NM, 4(%rsp)
>          jmp   handle_exception
>  END(entry_NM)
>  
>  FUNC(entry_DB)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_DB, 4(%rsp)
> +        movb  $X86_EXC_DB, 4(%rsp)
>          jmp   handle_ist_exception
>  END(entry_DB)
>  
>  FUNC(entry_BP)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_BP, 4(%rsp)
> +        movb  $X86_EXC_BP, 4(%rsp)
>          jmp   handle_exception
>  END(entry_BP)
>  
>  FUNC(entry_OF)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_OF, 4(%rsp)
> +        movb  $X86_EXC_OF, 4(%rsp)
>          jmp   handle_exception
>  END(entry_OF)
>  
>  FUNC(entry_BR)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_BR, 4(%rsp)
> +        movb  $X86_EXC_BR, 4(%rsp)
>          jmp   handle_exception
>  END(entry_BR)
>  
>  FUNC(entry_UD)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_UD, 4(%rsp)
> +        movb  $X86_EXC_UD, 4(%rsp)
>          jmp   handle_exception
>  END(entry_UD)
>  
>  FUNC(entry_TS)
>          ENDBR64
> -        movl  $X86_EXC_TS, 4(%rsp)
> +        movb  $X86_EXC_TS, 4(%rsp)
>          jmp   handle_exception
>  END(entry_TS)
>  
>  FUNC(entry_NP)
>          ENDBR64
> -        movl  $X86_EXC_NP, 4(%rsp)
> +        movb  $X86_EXC_NP, 4(%rsp)
>          jmp   handle_exception
>  END(entry_NP)
>  
>  FUNC(entry_SS)
>          ENDBR64
> -        movl  $X86_EXC_SS, 4(%rsp)
> +        movb  $X86_EXC_SS, 4(%rsp)
>          jmp   handle_exception
>  END(entry_SS)
>  
>  FUNC(entry_GP)
>          ENDBR64
> -        movl  $X86_EXC_GP, 4(%rsp)
> +        movb  $X86_EXC_GP, 4(%rsp)
>          jmp   handle_exception
>  END(entry_GP)
>  
>  FUNC(entry_AC)
>          ENDBR64
> -        movl  $X86_EXC_AC, 4(%rsp)
> +        movb  $X86_EXC_AC, 4(%rsp)
>          jmp   handle_exception
>  END(entry_AC)
>  
>  FUNC(entry_CP)
>          ENDBR64
> -        movl  $X86_EXC_CP, 4(%rsp)
> +        movb  $X86_EXC_CP, 4(%rsp)
>          jmp   handle_exception
>  END(entry_CP)
>  
>  FUNC(entry_DF)
>          ENDBR64
> -        movl  $X86_EXC_DF, 4(%rsp)
> +        movb  $X86_EXC_DF, 4(%rsp)
>          /* Set AC to reduce chance of further SMAP faults */
>          ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP
>          SAVE_ALL
> @@ -1022,7 +1022,7 @@ END(entry_DF)
>  FUNC(entry_NMI)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_NMI, 4(%rsp)
> +        movb  $X86_EXC_NMI, 4(%rsp)
>  END(entry_NMI)
>  
>  FUNC(handle_ist_exception)
> @@ -1158,7 +1158,7 @@ END(handle_ist_exception)
>  FUNC(entry_MC)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_MC, 4(%rsp)
> +        movb  $X86_EXC_MC, 4(%rsp)
>          jmp   handle_ist_exception
>  END(entry_MC)
>  



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 10:35 [PATCH v2] Reduce assembly code size of exception entry points Frediano Ziglio
  2024-02-14 13:56 ` Jan Beulich
@ 2024-02-14 14:20 ` Jan Beulich
  2024-02-14 15:02 ` Roger Pau Monné
  2 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2024-02-14 14:20 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: Andrew Cooper, Roger Pau Monné, xen-devel, Wei Liu

On 14.02.2024 11:35, Frediano Ziglio wrote:
> @@ -898,105 +898,105 @@ END(handle_exception)
>  FUNC(entry_DE)
>          ENDBR64
>          pushq $0
> -        movl  $X86_EXC_DE, 4(%rsp)
> +        movb  $X86_EXC_DE, 4(%rsp)

As we're trying to compact things: This writes 0 over the previously
pushed 0. The insn therefore could be replaced by
"BUILD_BUG_ON X86_EXC_DE".

Jan


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 10:35 [PATCH v2] Reduce assembly code size of exception entry points Frediano Ziglio
  2024-02-14 13:56 ` Jan Beulich
  2024-02-14 14:20 ` Jan Beulich
@ 2024-02-14 15:02 ` Roger Pau Monné
  2024-02-14 15:08   ` Jan Beulich
  2 siblings, 1 reply; 8+ messages in thread
From: Roger Pau Monné @ 2024-02-14 15:02 UTC (permalink / raw)
  To: Frediano Ziglio; +Cc: xen-devel, Jan Beulich, Andrew Cooper, Wei Liu

On Wed, Feb 14, 2024 at 10:35:58AM +0000, Frediano Ziglio wrote:
> We just pushed a 8-bytes zero and exception constants are
> small so we can just write a single byte saving 3 bytes for
> instruction.
> With ENDBR64 this reduces the size of many entry points from 32 to
> 16 bytes (due to alignment).
> Similar code is already used in autogen_stubs.

Will using movb instead of movl have any performance impact?  I don't
think we should trade speed for code size, so this needs to be
mentioned in the commit message.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 15:02 ` Roger Pau Monné
@ 2024-02-14 15:08   ` Jan Beulich
  2024-02-14 15:29     ` Roger Pau Monné
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2024-02-14 15:08 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, Andrew Cooper, Wei Liu, Frediano Ziglio

On 14.02.2024 16:02, Roger Pau Monné wrote:
> On Wed, Feb 14, 2024 at 10:35:58AM +0000, Frediano Ziglio wrote:
>> We just pushed a 8-bytes zero and exception constants are
>> small so we can just write a single byte saving 3 bytes for
>> instruction.
>> With ENDBR64 this reduces the size of many entry points from 32 to
>> 16 bytes (due to alignment).
>> Similar code is already used in autogen_stubs.
> 
> Will using movb instead of movl have any performance impact?  I don't
> think we should trade speed for code size, so this needs to be
> mentioned in the commit message.

That's really what the last sentence is about (it could have been said
more explicitly though): If doing so on interrupt paths is fine, it
ought to be fine on exception paths as well. Plus, no matter what, we
have two overlapping stores in every one of these places. But yes,
their sizes may still be relevant to the overall result.

Jan


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 15:08   ` Jan Beulich
@ 2024-02-14 15:29     ` Roger Pau Monné
  2024-02-14 15:53       ` Andrew Cooper
  0 siblings, 1 reply; 8+ messages in thread
From: Roger Pau Monné @ 2024-02-14 15:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper, Wei Liu, Frediano Ziglio

On Wed, Feb 14, 2024 at 04:08:12PM +0100, Jan Beulich wrote:
> On 14.02.2024 16:02, Roger Pau Monné wrote:
> > On Wed, Feb 14, 2024 at 10:35:58AM +0000, Frediano Ziglio wrote:
> >> We just pushed a 8-bytes zero and exception constants are
> >> small so we can just write a single byte saving 3 bytes for
> >> instruction.
> >> With ENDBR64 this reduces the size of many entry points from 32 to
> >> 16 bytes (due to alignment).
> >> Similar code is already used in autogen_stubs.
> > 
> > Will using movb instead of movl have any performance impact?  I don't
> > think we should trade speed for code size, so this needs to be
> > mentioned in the commit message.
> 
> That's really what the last sentence is about (it could have been said
> more explicitly though): If doing so on interrupt paths is fine, it
> ought to be fine on exception paths as well.

I might view it the other way around: maybe it's autogen_stubs that
needs changing to use movl instead of movb for performance reasons?

I think this needs to be clearly stated, and ideally some kind of
benchmarks should be provided to demonstrate no performance change if
there are doubts whether movl and movb might perform differently.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 15:29     ` Roger Pau Monné
@ 2024-02-14 15:53       ` Andrew Cooper
  2024-02-14 16:05         ` Roger Pau Monné
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2024-02-14 15:53 UTC (permalink / raw)
  To: Roger Pau Monné, Jan Beulich; +Cc: xen-devel, Wei Liu, Frediano Ziglio

On 14/02/2024 3:29 pm, Roger Pau Monné wrote:
> On Wed, Feb 14, 2024 at 04:08:12PM +0100, Jan Beulich wrote:
>> On 14.02.2024 16:02, Roger Pau Monné wrote:
>>> On Wed, Feb 14, 2024 at 10:35:58AM +0000, Frediano Ziglio wrote:
>>>> We just pushed a 8-bytes zero and exception constants are
>>>> small so we can just write a single byte saving 3 bytes for
>>>> instruction.
>>>> With ENDBR64 this reduces the size of many entry points from 32 to
>>>> 16 bytes (due to alignment).
>>>> Similar code is already used in autogen_stubs.
>>> Will using movb instead of movl have any performance impact?  I don't
>>> think we should trade speed for code size, so this needs to be
>>> mentioned in the commit message.
>> That's really what the last sentence is about (it could have been said
>> more explicitly though): If doing so on interrupt paths is fine, it
>> ought to be fine on exception paths as well.
> I might view it the other way around: maybe it's autogen_stubs that
> needs changing to use movl instead of movb for performance reasons?
>
> I think this needs to be clearly stated, and ideally some kind of
> benchmarks should be provided to demonstrate no performance change if
> there are doubts whether movl and movb might perform differently.

The push and the mov are overlapping stores either way.  Swapping
between movl and movb will make no difference at all.

However, the shorter instruction ends up halving the size of the entry
stub when alignment is considered, and that will make a marginal
difference.  Fewer cache misses (to a first approximation, even #PF will
be L1-cold), and better utilisation of branch prediction resource (~>
less likely to be BP-cold).

I doubt you'll be able to see a difference without perf counters
(whatever difference is covered here will be dwarfed by the speculation
workarounds), but a marginal win is still a win.

~Andrew


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Reduce assembly code size of exception entry points
  2024-02-14 15:53       ` Andrew Cooper
@ 2024-02-14 16:05         ` Roger Pau Monné
  0 siblings, 0 replies; 8+ messages in thread
From: Roger Pau Monné @ 2024-02-14 16:05 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Jan Beulich, xen-devel, Wei Liu, Frediano Ziglio

On Wed, Feb 14, 2024 at 03:53:24PM +0000, Andrew Cooper wrote:
> On 14/02/2024 3:29 pm, Roger Pau Monné wrote:
> > On Wed, Feb 14, 2024 at 04:08:12PM +0100, Jan Beulich wrote:
> >> On 14.02.2024 16:02, Roger Pau Monné wrote:
> >>> On Wed, Feb 14, 2024 at 10:35:58AM +0000, Frediano Ziglio wrote:
> >>>> We just pushed a 8-bytes zero and exception constants are
> >>>> small so we can just write a single byte saving 3 bytes for
> >>>> instruction.
> >>>> With ENDBR64 this reduces the size of many entry points from 32 to
> >>>> 16 bytes (due to alignment).
> >>>> Similar code is already used in autogen_stubs.
> >>> Will using movb instead of movl have any performance impact?  I don't
> >>> think we should trade speed for code size, so this needs to be
> >>> mentioned in the commit message.
> >> That's really what the last sentence is about (it could have been said
> >> more explicitly though): If doing so on interrupt paths is fine, it
> >> ought to be fine on exception paths as well.
> > I might view it the other way around: maybe it's autogen_stubs that
> > needs changing to use movl instead of movb for performance reasons?
> >
> > I think this needs to be clearly stated, and ideally some kind of
> > benchmarks should be provided to demonstrate no performance change if
> > there are doubts whether movl and movb might perform differently.
> 
> The push and the mov are overlapping stores either way.  Swapping
> between movl and movb will make no difference at all.
> 
> However, the shorter instruction ends up halving the size of the entry
> stub when alignment is considered, and that will make a marginal
> difference.  Fewer cache misses (to a first approximation, even #PF will
> be L1-cold), and better utilisation of branch prediction resource (~>
> less likely to be BP-cold).
> 
> I doubt you'll be able to see a difference without perf counters
> (whatever difference is covered here will be dwarfed by the speculation
> workarounds), but a marginal win is still a win.

I'm happy just stating in the commit message that the change doesn't
make any performance difference.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-02-14 16:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-14 10:35 [PATCH v2] Reduce assembly code size of exception entry points Frediano Ziglio
2024-02-14 13:56 ` Jan Beulich
2024-02-14 14:20 ` Jan Beulich
2024-02-14 15:02 ` Roger Pau Monné
2024-02-14 15:08   ` Jan Beulich
2024-02-14 15:29     ` Roger Pau Monné
2024-02-14 15:53       ` Andrew Cooper
2024-02-14 16:05         ` Roger Pau Monné

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.