public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86: Rewrite ret_from_fork() in C
@ 2023-06-22 12:07 Brian Gerst
  2023-06-22 12:07 ` [PATCH 1/2] x86/32: Remove schedule_tail_wrapper() Brian Gerst
  2023-06-22 12:07 ` [PATCH 2/2] x86: Rewrite ret_from_fork() in C Brian Gerst
  0 siblings, 2 replies; 8+ messages in thread
From: Brian Gerst @ 2023-06-22 12:07 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Thomas Gleixner, Borislav Petkov, H . Peter Anvin, Peter Zijlstra,
	Sami Tolvanen, alyssa.milburn, keescook, jpoimboe, joao,
	tim.c.chen, Brian Gerst

When kCFI is enabled, special handling is needed for the indirect call
to the kernel thread function.  Handling this in pure assembly is not
simple, so moving it to C is more appropriate.  Instead of moving just
the indirect call to C as Peter Zijlstra has proposed, this patchset
rewrites the whole ret_from_fork() function in C (other than some
necessary asm glue remaning).

Brian Gerst (2):
  x86/32: Remove schedule_tail_wrapper()
  x86: Rewrite ret_from_fork() in C

 arch/x86/entry/entry_32.S        | 54 ++++++++------------------------
 arch/x86/entry/entry_64.S        | 35 ++++++---------------
 arch/x86/include/asm/switch_to.h |  4 ++-
 arch/x86/kernel/process.c        | 22 ++++++++++++-
 4 files changed, 47 insertions(+), 68 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] x86/32: Remove schedule_tail_wrapper()
  2023-06-22 12:07 [PATCH 0/2] x86: Rewrite ret_from_fork() in C Brian Gerst
@ 2023-06-22 12:07 ` Brian Gerst
  2023-06-22 12:07 ` [PATCH 2/2] x86: Rewrite ret_from_fork() in C Brian Gerst
  1 sibling, 0 replies; 8+ messages in thread
From: Brian Gerst @ 2023-06-22 12:07 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Thomas Gleixner, Borislav Petkov, H . Peter Anvin, Peter Zijlstra,
	Sami Tolvanen, alyssa.milburn, keescook, jpoimboe, joao,
	tim.c.chen, Brian Gerst

The unwinder expects a return address at the very top of the kernel
stack just below pt_regs and before any stack frame is created.  Instead
of calling a wrapper, set up a return address as if ret_from_fork()
was called from the syscall entry code.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/entry/entry_32.S | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 91397f58ac30..6c1ee76adc11 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -719,26 +719,6 @@ SYM_CODE_START(__switch_to_asm)
 SYM_CODE_END(__switch_to_asm)
 .popsection
 
-/*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-.pushsection .text, "ax"
-SYM_FUNC_START(schedule_tail_wrapper)
-	FRAME_BEGIN
-
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-
-	FRAME_END
-	RET
-SYM_FUNC_END(schedule_tail_wrapper)
-.popsection
-
 /*
  * A newly forked process directly context switches into this address.
  *
@@ -748,7 +728,13 @@ SYM_FUNC_END(schedule_tail_wrapper)
  */
 .pushsection .text, "ax"
 SYM_CODE_START(ret_from_fork)
-	call	schedule_tail_wrapper
+	/* return address for the stack unwinder */
+	pushl	$.Lsyscall_32_done
+	FRAME_BEGIN
+
+	pushl	%eax
+	call	schedule_tail
+	addl	$4, %esp
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
@@ -757,7 +743,9 @@ SYM_CODE_START(ret_from_fork)
 	/* When we fork, we trace the syscall return in the child, too. */
 	movl    %esp, %eax
 	call    syscall_exit_to_user_mode
-	jmp     .Lsyscall_32_done
+
+	FRAME_END
+	RET
 
 	/* kernel thread */
 1:	movl	%edi, %eax
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] x86: Rewrite ret_from_fork() in C
  2023-06-22 12:07 [PATCH 0/2] x86: Rewrite ret_from_fork() in C Brian Gerst
  2023-06-22 12:07 ` [PATCH 1/2] x86/32: Remove schedule_tail_wrapper() Brian Gerst
@ 2023-06-22 12:07 ` Brian Gerst
  2023-06-22 13:29   ` Peter Zijlstra
  2023-06-23 18:12   ` Brian Gerst
  1 sibling, 2 replies; 8+ messages in thread
From: Brian Gerst @ 2023-06-22 12:07 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Thomas Gleixner, Borislav Petkov, H . Peter Anvin, Peter Zijlstra,
	Sami Tolvanen, alyssa.milburn, keescook, jpoimboe, joao,
	tim.c.chen, Brian Gerst

When kCFI is enabled, special handling is needed for the indirect call
to the kernel thread function.  Rewrite the ret_from_fork() function in
C so that the compiler can properly handle the indirect call.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/entry/entry_32.S        | 30 +++++++--------------------
 arch/x86/entry/entry_64.S        | 35 +++++++++-----------------------
 arch/x86/include/asm/switch_to.h |  4 +++-
 arch/x86/kernel/process.c        | 22 +++++++++++++++++++-
 4 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 6c1ee76adc11..7932c14199fb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -727,37 +727,21 @@ SYM_CODE_END(__switch_to_asm)
  * edi: kernel thread arg
  */
 .pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
 	/* return address for the stack unwinder */
 	pushl	$.Lsyscall_32_done
 	FRAME_BEGIN
 
-	pushl	%eax
-	call	schedule_tail
+	/* prev already in EAX */
+	movl	%esp, %edx	/* regs */
+	movl	%ebx, %ecx	/* fn */
+	pushl	%edi		/* fn_arg */
+	call	ret_from_fork
 	addl	$4, %esp
 
-	testl	%ebx, %ebx
-	jnz	1f		/* kernel threads are uncommon */
-
-2:
-	/* When we fork, we trace the syscall return in the child, too. */
-	movl    %esp, %eax
-	call    syscall_exit_to_user_mode
-
 	FRAME_END
 	RET
-
-	/* kernel thread */
-1:	movl	%edi, %eax
-	CALL_NOSPEC ebx
-	/*
-	 * A kernel thread is allowed to return here after successfully
-	 * calling kernel_execve().  Exit to userspace to complete the execve()
-	 * syscall.
-	 */
-	movl	$0, PT_EAX(%esp)
-	jmp	2b
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 .popsection
 
 SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f31e286c2977..5ee32e7e29e8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
  * r12: kernel thread arg
  */
 .pushsection .text, "ax"
-	__FUNC_ALIGN
-SYM_CODE_START_NOALIGN(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
 	UNWIND_HINT_END_OF_STACK
 	ANNOTATE_NOENDBR // copy_thread
 	CALL_DEPTH_ACCOUNT
-	movq	%rax, %rdi
-	call	schedule_tail			/* rdi: 'prev' task parameter */
-
-	testq	%rbx, %rbx			/* from kernel_thread? */
-	jnz	1f				/* kernel threads are uncommon */
 
-2:
-	UNWIND_HINT_REGS
-	movq	%rsp, %rdi
-	call	syscall_exit_to_user_mode	/* returns with IRQs disabled */
-	jmp	swapgs_restore_regs_and_return_to_usermode
+	/* return address for the stack unwinder */
+	pushq	$swapgs_restore_regs_and_return_to_usermode
+	UNWIND_HINT_FUNC
 
-1:
-	/* kernel thread */
-	UNWIND_HINT_END_OF_STACK
-	movq	%r12, %rdi
-	CALL_NOSPEC rbx
-	/*
-	 * A kernel thread is allowed to return here after successfully
-	 * calling kernel_execve().  Exit to userspace to complete the execve()
-	 * syscall.
-	 */
-	movq	$0, RAX(%rsp)
-	jmp	2b
-SYM_CODE_END(ret_from_fork)
+	movq	%rax, %rdi		/* prev */
+	movq	%rsp, %rsi		/* regs */
+	movq	%rbx, %rdx		/* fn */
+	movq	%r12, %rcx		/* fn_arg */
+	jmp	ret_from_fork
+SYM_CODE_END(ret_from_fork_asm)
 .popsection
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 5c91305d09d2..f42dbf17f52b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 __visible struct task_struct *__switch_to(struct task_struct *prev,
 					  struct task_struct *next);
 
-asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_fork_asm(void);
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+			     int (*fn)(void *), void *fn_arg);
 
 /*
  * This is the structure pointed to by thread.sp for an inactive task.  The
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dac41a0072ea..f5dbfebac076 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@
 #include <linux/static_call.h>
 #include <trace/events/power.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/entry-common.h>
 #include <asm/cpu.h>
 #include <asm/apic.h>
 #include <linux/uaccess.h>
@@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
 		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
 }
 
+__visible noinstr void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+				     int (*fn)(void *), void *fn_arg)
+{
+	schedule_tail(prev);
+
+	/* Is this a kernel thread? */
+	if (unlikely(fn)) {
+		fn(fn_arg);
+		/*
+		 * A kernel thread is allowed to return here after successfully
+		 * calling kernel_execve().  Exit to userspace to complete the
+		 * execve() syscall.
+		 */
+		regs->ax = 0;
+	}
+
+	syscall_exit_to_user_mode(regs);
+}
+
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 {
 	unsigned long clone_flags = args->flags;
@@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	frame = &fork_frame->frame;
 
 	frame->bp = encode_frame_pointer(childregs);
-	frame->ret_addr = (unsigned long) ret_from_fork;
+	frame->ret_addr = (unsigned long) ret_from_fork_asm;
 	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.io_bitmap = NULL;
 	p->thread.iopl_warn = 0;
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86: Rewrite ret_from_fork() in C
  2023-06-22 12:07 ` [PATCH 2/2] x86: Rewrite ret_from_fork() in C Brian Gerst
@ 2023-06-22 13:29   ` Peter Zijlstra
  2023-06-22 16:04     ` Brian Gerst
  2023-06-23 18:12   ` Brian Gerst
  1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2023-06-22 13:29 UTC (permalink / raw)
  To: Brian Gerst
  Cc: linux-kernel, x86, Thomas Gleixner, Borislav Petkov,
	H . Peter Anvin, Sami Tolvanen, alyssa.milburn, keescook,
	jpoimboe, joao, tim.c.chen

On Thu, Jun 22, 2023 at 08:07:50AM -0400, Brian Gerst wrote:
> When kCFI is enabled, special handling is needed for the indirect call
> to the kernel thread function.  Rewrite the ret_from_fork() function in
> C so that the compiler can properly handle the indirect call.
> 
> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>

This is much nicer indeed. I'll take these patches into my series and
repost later today if you don't mind.

One little niggle below..

> ---

> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index f31e286c2977..5ee32e7e29e8 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
>   * r12: kernel thread arg
>   */
>  .pushsection .text, "ax"
> +SYM_CODE_START(ret_from_fork_asm)
>  	UNWIND_HINT_END_OF_STACK
>  	ANNOTATE_NOENDBR // copy_thread
>  	CALL_DEPTH_ACCOUNT
>  
> +	/* return address for the stack unwinder */
> +	pushq	$swapgs_restore_regs_and_return_to_usermode
> +	UNWIND_HINT_FUNC
>  
> +	movq	%rax, %rdi		/* prev */
> +	movq	%rsp, %rsi		/* regs */
> +	movq	%rbx, %rdx		/* fn */
> +	movq	%r12, %rcx		/* fn_arg */
> +	jmp	ret_from_fork
> +SYM_CODE_END(ret_from_fork_asm)
>  .popsection
>  
>  .macro DEBUG_ENTRY_ASSERT_IRQS_OFF

> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index dac41a0072ea..f5dbfebac076 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -28,6 +28,7 @@
>  #include <linux/static_call.h>
>  #include <trace/events/power.h>
>  #include <linux/hw_breakpoint.h>
> +#include <linux/entry-common.h>
>  #include <asm/cpu.h>
>  #include <asm/apic.h>
>  #include <linux/uaccess.h>
> @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
>  		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
>  }
>  
> +__visible noinstr void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
> +				     int (*fn)(void *), void *fn_arg)

So I had noinstr in my initial patch, but it leads to objtool
complaints. I suppose we can actually handle tracing and all the other
gunk at this point, so I've removed it.

The alternative is to use __noinstr_section(".text") if we really want
to suppress all the funnies.

> +{
> +	schedule_tail(prev);
> +
> +	/* Is this a kernel thread? */
> +	if (unlikely(fn)) {
> +		fn(fn_arg);
> +		/*
> +		 * A kernel thread is allowed to return here after successfully
> +		 * calling kernel_execve().  Exit to userspace to complete the
> +		 * execve() syscall.
> +		 */
> +		regs->ax = 0;
> +	}
> +
> +	syscall_exit_to_user_mode(regs);
> +}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86: Rewrite ret_from_fork() in C
  2023-06-22 13:29   ` Peter Zijlstra
@ 2023-06-22 16:04     ` Brian Gerst
  2023-06-22 16:33       ` H. Peter Anvin
  0 siblings, 1 reply; 8+ messages in thread
From: Brian Gerst @ 2023-06-22 16:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, x86, Thomas Gleixner, Borislav Petkov,
	H . Peter Anvin, Sami Tolvanen, alyssa.milburn, keescook,
	jpoimboe, joao, tim.c.chen

On Thu, Jun 22, 2023 at 9:29 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Jun 22, 2023 at 08:07:50AM -0400, Brian Gerst wrote:
> > When kCFI is enabled, special handling is needed for the indirect call
> > to the kernel thread function.  Rewrite the ret_from_fork() function in
> > C so that the compiler can properly handle the indirect call.
> >
> > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Signed-off-by: Brian Gerst <brgerst@gmail.com>
>
> This is much nicer indeed. I'll take these patches into my series and
> repost later today if you don't mind.

Yes, that's fine.

> One little niggle below..
>
> > ---
>
> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > index f31e286c2977..5ee32e7e29e8 100644
> > --- a/arch/x86/entry/entry_64.S
> > +++ b/arch/x86/entry/entry_64.S
> > @@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
> >   * r12: kernel thread arg
> >   */
> >  .pushsection .text, "ax"
> > +SYM_CODE_START(ret_from_fork_asm)
> >       UNWIND_HINT_END_OF_STACK
> >       ANNOTATE_NOENDBR // copy_thread
> >       CALL_DEPTH_ACCOUNT
> >
> > +     /* return address for the stack unwinder */
> > +     pushq   $swapgs_restore_regs_and_return_to_usermode
> > +     UNWIND_HINT_FUNC
> >
> > +     movq    %rax, %rdi              /* prev */
> > +     movq    %rsp, %rsi              /* regs */
> > +     movq    %rbx, %rdx              /* fn */
> > +     movq    %r12, %rcx              /* fn_arg */
> > +     jmp     ret_from_fork
> > +SYM_CODE_END(ret_from_fork_asm)
> >  .popsection
> >
> >  .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
>
> > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> > index dac41a0072ea..f5dbfebac076 100644
> > --- a/arch/x86/kernel/process.c
> > +++ b/arch/x86/kernel/process.c
> > @@ -28,6 +28,7 @@
> >  #include <linux/static_call.h>
> >  #include <trace/events/power.h>
> >  #include <linux/hw_breakpoint.h>
> > +#include <linux/entry-common.h>
> >  #include <asm/cpu.h>
> >  #include <asm/apic.h>
> >  #include <linux/uaccess.h>
> > @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
> >               return do_set_thread_area_64(p, ARCH_SET_FS, tls);
> >  }
> >
> > +__visible noinstr void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
> > +                                  int (*fn)(void *), void *fn_arg)
>
> So I had noinstr in my initial patch, but it leads to objtool
> complaints. I suppose we can actually handle tracing and all the other
> gunk at this point, so I've removed it.

I'm not an expert on noinstr usage, but looking at the other syscall
functions, instrumentation needs to be disabled before
syscall_exit_to_user_mode() is called.  Perhaps adding an
instrumentation_begin()/instrumentation_end() pair to this function is
needed?

Brian Gerst

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86: Rewrite ret_from_fork() in C
  2023-06-22 16:04     ` Brian Gerst
@ 2023-06-22 16:33       ` H. Peter Anvin
  2023-06-22 17:33         ` Brian Gerst
  0 siblings, 1 reply; 8+ messages in thread
From: H. Peter Anvin @ 2023-06-22 16:33 UTC (permalink / raw)
  To: Brian Gerst, Peter Zijlstra, xin3.li
  Cc: linux-kernel, x86, Thomas Gleixner, Borislav Petkov,
	Sami Tolvanen, alyssa.milburn, keescook, jpoimboe, joao,
	tim.c.chen

On June 22, 2023 9:04:03 AM PDT, Brian Gerst <brgerst@gmail.com> wrote:
>On Thu, Jun 22, 2023 at 9:29 AM Peter Zijlstra <peterz@infradead.org> wrote:
>>
>> On Thu, Jun 22, 2023 at 08:07:50AM -0400, Brian Gerst wrote:
>> > When kCFI is enabled, special handling is needed for the indirect call
>> > to the kernel thread function.  Rewrite the ret_from_fork() function in
>> > C so that the compiler can properly handle the indirect call.
>> >
>> > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> > Signed-off-by: Brian Gerst <brgerst@gmail.com>
>>
>> This is much nicer indeed. I'll take these patches into my series and
>> repost later today if you don't mind.
>
>Yes, that's fine.
>
>> One little niggle below..
>>
>> > ---
>>
>> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> > index f31e286c2977..5ee32e7e29e8 100644
>> > --- a/arch/x86/entry/entry_64.S
>> > +++ b/arch/x86/entry/entry_64.S
>> > @@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
>> >   * r12: kernel thread arg
>> >   */
>> >  .pushsection .text, "ax"
>> > +SYM_CODE_START(ret_from_fork_asm)
>> >       UNWIND_HINT_END_OF_STACK
>> >       ANNOTATE_NOENDBR // copy_thread
>> >       CALL_DEPTH_ACCOUNT
>> >
>> > +     /* return address for the stack unwinder */
>> > +     pushq   $swapgs_restore_regs_and_return_to_usermode
>> > +     UNWIND_HINT_FUNC
>> >
>> > +     movq    %rax, %rdi              /* prev */
>> > +     movq    %rsp, %rsi              /* regs */
>> > +     movq    %rbx, %rdx              /* fn */
>> > +     movq    %r12, %rcx              /* fn_arg */
>> > +     jmp     ret_from_fork
>> > +SYM_CODE_END(ret_from_fork_asm)
>> >  .popsection
>> >
>> >  .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
>>
>> > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> > index dac41a0072ea..f5dbfebac076 100644
>> > --- a/arch/x86/kernel/process.c
>> > +++ b/arch/x86/kernel/process.c
>> > @@ -28,6 +28,7 @@
>> >  #include <linux/static_call.h>
>> >  #include <trace/events/power.h>
>> >  #include <linux/hw_breakpoint.h>
>> > +#include <linux/entry-common.h>
>> >  #include <asm/cpu.h>
>> >  #include <asm/apic.h>
>> >  #include <linux/uaccess.h>
>> > @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
>> >               return do_set_thread_area_64(p, ARCH_SET_FS, tls);
>> >  }
>> >
>> > +__visible noinstr void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
>> > +                                  int (*fn)(void *), void *fn_arg)
>>
>> So I had noinstr in my initial patch, but it leads to objtool
>> complaints. I suppose we can actually handle tracing and all the other
>> gunk at this point, so I've removed it.
>
>I'm not an expert on noinstr usage, but looking at the other syscall
>functions, instrumentation needs to be disabled before
>syscall_exit_to_user_mode() is called.  Perhaps adding an
>instrumentation_begin()/instrumentation_end() pair to this function is
>needed?
>
>Brian Gerst
>

I don't have the code in front of me right now, but how does this affect FRED enabling? In the case of FRED, the exit path is much simpler; in the FRED enabling patchset we simply deal with it by alternatives-patching the terminal jump after resetting the stack pointer to the standard FRED user space exit stub (which simply pops the user space registers and executes ERETU.)

I'm assuming this is still valid/possible after your patches, since resetting the stack pointer isn't possible in C, but I wanted to double check.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86: Rewrite ret_from_fork() in C
  2023-06-22 16:33       ` H. Peter Anvin
@ 2023-06-22 17:33         ` Brian Gerst
  0 siblings, 0 replies; 8+ messages in thread
From: Brian Gerst @ 2023-06-22 17:33 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Peter Zijlstra, xin3.li, linux-kernel, x86, Thomas Gleixner,
	Borislav Petkov, Sami Tolvanen, alyssa.milburn, keescook,
	jpoimboe, joao, tim.c.chen

On Thu, Jun 22, 2023 at 12:33 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> On June 22, 2023 9:04:03 AM PDT, Brian Gerst <brgerst@gmail.com> wrote:
> >On Thu, Jun 22, 2023 at 9:29 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >>
> >> On Thu, Jun 22, 2023 at 08:07:50AM -0400, Brian Gerst wrote:
> >> > When kCFI is enabled, special handling is needed for the indirect call
> >> > to the kernel thread function.  Rewrite the ret_from_fork() function in
> >> > C so that the compiler can properly handle the indirect call.
> >> >
> >> > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> >> > Signed-off-by: Brian Gerst <brgerst@gmail.com>
> >>
> >> This is much nicer indeed. I'll take these patches into my series and
> >> repost later today if you don't mind.
> >
> >Yes, that's fine.
> >
> >> One little niggle below..
> >>
> >> > ---
> >>
> >> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> >> > index f31e286c2977..5ee32e7e29e8 100644
> >> > --- a/arch/x86/entry/entry_64.S
> >> > +++ b/arch/x86/entry/entry_64.S
> >> > @@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
> >> >   * r12: kernel thread arg
> >> >   */
> >> >  .pushsection .text, "ax"
> >> > +SYM_CODE_START(ret_from_fork_asm)
> >> >       UNWIND_HINT_END_OF_STACK
> >> >       ANNOTATE_NOENDBR // copy_thread
> >> >       CALL_DEPTH_ACCOUNT
> >> >
> >> > +     /* return address for the stack unwinder */
> >> > +     pushq   $swapgs_restore_regs_and_return_to_usermode
> >> > +     UNWIND_HINT_FUNC
> >> >
> >> > +     movq    %rax, %rdi              /* prev */
> >> > +     movq    %rsp, %rsi              /* regs */
> >> > +     movq    %rbx, %rdx              /* fn */
> >> > +     movq    %r12, %rcx              /* fn_arg */
> >> > +     jmp     ret_from_fork
> >> > +SYM_CODE_END(ret_from_fork_asm)
> >> >  .popsection
> >> >
> >> >  .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
> >>
> >> > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> >> > index dac41a0072ea..f5dbfebac076 100644
> >> > --- a/arch/x86/kernel/process.c
> >> > +++ b/arch/x86/kernel/process.c
> >> > @@ -28,6 +28,7 @@
> >> >  #include <linux/static_call.h>
> >> >  #include <trace/events/power.h>
> >> >  #include <linux/hw_breakpoint.h>
> >> > +#include <linux/entry-common.h>
> >> >  #include <asm/cpu.h>
> >> >  #include <asm/apic.h>
> >> >  #include <linux/uaccess.h>
> >> > @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
> >> >               return do_set_thread_area_64(p, ARCH_SET_FS, tls);
> >> >  }
> >> >
> >> > +__visible noinstr void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
> >> > +                                  int (*fn)(void *), void *fn_arg)
> >>
> >> So I had noinstr in my initial patch, but it leads to objtool
> >> complaints. I suppose we can actually handle tracing and all the other
> >> gunk at this point, so I've removed it.
> >
> >I'm not an expert on noinstr usage, but looking at the other syscall
> >functions, instrumentation needs to be disabled before
> >syscall_exit_to_user_mode() is called.  Perhaps adding an
> >instrumentation_begin()/instrumentation_end() pair to this function is
> >needed?
> >
> >Brian Gerst
> >
>
> I don't have the code in front of me right now, but how does this affect FRED enabling? In the case of FRED, the exit path is much simpler; in the FRED enabling patchset we simply deal with it by alternatives-patching the terminal jump after resetting the stack pointer to the standard FRED user space exit stub (which simply pops the user space registers and executes ERETU.)
>
> I'm assuming this is still valid/possible after your patches, since resetting the stack pointer isn't possible in C, but I wanted to double check.

It should just need to change the "pushq
$swapgs_restore_regs_and_return_to_usermode" to some other address via
an alternative.

Brian Gerst

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86: Rewrite ret_from_fork() in C
  2023-06-22 12:07 ` [PATCH 2/2] x86: Rewrite ret_from_fork() in C Brian Gerst
  2023-06-22 13:29   ` Peter Zijlstra
@ 2023-06-23 18:12   ` Brian Gerst
  1 sibling, 0 replies; 8+ messages in thread
From: Brian Gerst @ 2023-06-23 18:12 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Thomas Gleixner, Borislav Petkov, H . Peter Anvin, Peter Zijlstra,
	Sami Tolvanen, alyssa.milburn, keescook, jpoimboe, joao,
	tim.c.chen

On Thu, Jun 22, 2023 at 8:08 AM Brian Gerst <brgerst@gmail.com> wrote:
>
> When kCFI is enabled, special handling is needed for the indirect call
> to the kernel thread function.  Rewrite the ret_from_fork() function in
> C so that the compiler can properly handle the indirect call.
>
> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> ---
>  arch/x86/entry/entry_32.S        | 30 +++++++--------------------
>  arch/x86/entry/entry_64.S        | 35 +++++++++-----------------------
>  arch/x86/include/asm/switch_to.h |  4 +++-
>  arch/x86/kernel/process.c        | 22 +++++++++++++++++++-
>  4 files changed, 41 insertions(+), 50 deletions(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 6c1ee76adc11..7932c14199fb 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -727,37 +727,21 @@ SYM_CODE_END(__switch_to_asm)
>   * edi: kernel thread arg
>   */
>  .pushsection .text, "ax"
> -SYM_CODE_START(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
>         /* return address for the stack unwinder */
>         pushl   $.Lsyscall_32_done
>         FRAME_BEGIN
>
> -       pushl   %eax
> -       call    schedule_tail
> +       /* prev already in EAX */
> +       movl    %esp, %edx      /* regs */
> +       movl    %ebx, %ecx      /* fn */
> +       pushl   %edi            /* fn_arg */
> +       call    ret_from_fork
>         addl    $4, %esp
>
> -       testl   %ebx, %ebx
> -       jnz     1f              /* kernel threads are uncommon */
> -
> -2:
> -       /* When we fork, we trace the syscall return in the child, too. */
> -       movl    %esp, %eax
> -       call    syscall_exit_to_user_mode
> -
>         FRAME_END
>         RET
> -
> -       /* kernel thread */
> -1:     movl    %edi, %eax
> -       CALL_NOSPEC ebx
> -       /*
> -        * A kernel thread is allowed to return here after successfully
> -        * calling kernel_execve().  Exit to userspace to complete the execve()
> -        * syscall.
> -        */
> -       movl    $0, PT_EAX(%esp)
> -       jmp     2b
> -SYM_CODE_END(ret_from_fork)
> +SYM_CODE_END(ret_from_fork_asm)
>  .popsection
>
>  SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index f31e286c2977..5ee32e7e29e8 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -284,36 +284,21 @@ SYM_FUNC_END(__switch_to_asm)
>   * r12: kernel thread arg
>   */
>  .pushsection .text, "ax"
> -       __FUNC_ALIGN
> -SYM_CODE_START_NOALIGN(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
>         UNWIND_HINT_END_OF_STACK
>         ANNOTATE_NOENDBR // copy_thread
>         CALL_DEPTH_ACCOUNT
> -       movq    %rax, %rdi
> -       call    schedule_tail                   /* rdi: 'prev' task parameter */
> -
> -       testq   %rbx, %rbx                      /* from kernel_thread? */
> -       jnz     1f                              /* kernel threads are uncommon */
>
> -2:
> -       UNWIND_HINT_REGS
> -       movq    %rsp, %rdi
> -       call    syscall_exit_to_user_mode       /* returns with IRQs disabled */
> -       jmp     swapgs_restore_regs_and_return_to_usermode
> +       /* return address for the stack unwinder */
> +       pushq   $swapgs_restore_regs_and_return_to_usermode
> +       UNWIND_HINT_FUNC
>
> -1:
> -       /* kernel thread */
> -       UNWIND_HINT_END_OF_STACK
> -       movq    %r12, %rdi
> -       CALL_NOSPEC rbx
> -       /*
> -        * A kernel thread is allowed to return here after successfully
> -        * calling kernel_execve().  Exit to userspace to complete the execve()
> -        * syscall.
> -        */
> -       movq    $0, RAX(%rsp)
> -       jmp     2b
> -SYM_CODE_END(ret_from_fork)
> +       movq    %rax, %rdi              /* prev */
> +       movq    %rsp, %rsi              /* regs */

The push above makes this give the wrong address for regs.  New version coming.

Brian Gerst

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-06-23 18:13 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-06-22 12:07 [PATCH 0/2] x86: Rewrite ret_from_fork() in C Brian Gerst
2023-06-22 12:07 ` [PATCH 1/2] x86/32: Remove schedule_tail_wrapper() Brian Gerst
2023-06-22 12:07 ` [PATCH 2/2] x86: Rewrite ret_from_fork() in C Brian Gerst
2023-06-22 13:29   ` Peter Zijlstra
2023-06-22 16:04     ` Brian Gerst
2023-06-22 16:33       ` H. Peter Anvin
2023-06-22 17:33         ` Brian Gerst
2023-06-23 18:12   ` Brian Gerst

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox