Building the Linux kernel with Clang and LLVM
 help / color / mirror / Atom feed
* Re: [PATCH 1/2] um: mark rodata read-only and implement _nofault accesses
       [not found] ` <20250210160926.420133-2-benjamin@sipsolutions.net>
@ 2025-04-02 22:12   ` Nathan Chancellor
  2025-04-03  6:20     ` Benjamin Berg
  0 siblings, 1 reply; 4+ messages in thread
From: Nathan Chancellor @ 2025-04-02 22:12 UTC (permalink / raw)
  To: Benjamin Berg; +Cc: linux-um, Johannes Berg, Benjamin Berg, llvm

Hi Benjamin and Johannes,

On Mon, Feb 10, 2025 at 05:09:25PM +0100, Benjamin Berg wrote:
> From: Johannes Berg <johannes.berg@intel.com>
> 
> Mark read-only data actually read-only (simple mprotect), and
> to be able to test it also implement _nofault accesses. This
> works by setting up a new "segv_continue" pointer in current,
> and then when we hit a segfault we change the signal return
> context so that we continue at that address. The code using
> this sets it up so that it jumps to a label and then aborts
> the access that way, returning -EFAULT.
> 
> It's possible to optimize the ___backtrack_faulted() thing by
> using asm goto (compiler version dependent) and/or gcc's (not
> sure if clang has it) &&label extension, but at least in one
> attempt I made the && caused the compiler to not load -EFAULT
> into the register in case of jumping to the &&label from the
> fault handler. So leave it like this for now.
> 
> Co-developed-by: Benjamin Berg <benjamin.berg@intel.com>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
> ---
>  arch/um/Kconfig                          |  1 +
>  arch/um/include/asm/processor-generic.h  |  2 ++
>  arch/um/include/asm/uaccess.h            | 20 ++++++++++++-----
>  arch/um/include/shared/arch.h            |  2 ++
>  arch/um/include/shared/as-layout.h       |  2 +-
>  arch/um/include/shared/irq_user.h        |  3 ++-
>  arch/um/include/shared/kern_util.h       | 12 ++++++----
>  arch/um/kernel/irq.c                     |  3 ++-
>  arch/um/kernel/mem.c                     | 10 +++++++++
>  arch/um/kernel/trap.c                    | 28 +++++++++++++++++++-----
>  arch/um/os-Linux/signal.c                |  4 ++--
>  arch/um/os-Linux/skas/process.c          |  8 +++----
>  arch/x86/um/os-Linux/mcontext.c          | 12 ++++++++++
>  arch/x86/um/shared/sysdep/faultinfo_32.h | 12 ++++++++++
>  arch/x86/um/shared/sysdep/faultinfo_64.h | 12 ++++++++++
>  15 files changed, 108 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index 18051b1cfce0..79509c7f39de 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -12,6 +12,7 @@ config UML
>  	select ARCH_HAS_KCOV
>  	select ARCH_HAS_STRNCPY_FROM_USER
>  	select ARCH_HAS_STRNLEN_USER
> +	select ARCH_HAS_STRICT_KERNEL_RWX
>  	select HAVE_ARCH_AUDITSYSCALL
>  	select HAVE_ARCH_KASAN if X86_64
>  	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 5d6356eafffe..8a789c17acd8 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -31,6 +31,8 @@ struct thread_struct {
>  		} thread;
>  	} request;
>  
> +	void *segv_continue;
> +
>  	/* Contains variable sized FP registers */
>  	struct pt_regs regs;
>  };
> diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
> index 1d4b6bbc1b65..3a08f9029a3f 100644
> --- a/arch/um/include/asm/uaccess.h
> +++ b/arch/um/include/asm/uaccess.h
> @@ -9,6 +9,7 @@
>  
>  #include <asm/elf.h>
>  #include <linux/unaligned.h>
> +#include <sysdep/faultinfo.h>
>  
>  #define __under_task_size(addr, size) \
>  	(((unsigned long) (addr) < TASK_SIZE) && \
> @@ -44,19 +45,28 @@ static inline int __access_ok(const void __user *ptr, unsigned long size)
>  		 __access_ok_vsyscall(addr, size));
>  }
>  
> -/* no pagefaults for kernel addresses in um */
>  #define __get_kernel_nofault(dst, src, type, err_label)			\
>  do {									\
> -	*((type *)dst) = get_unaligned((type *)(src));			\
> -	if (0) /* make sure the label looks used to the compiler */	\
> +	int __faulted;							\
> +									\
> +	___backtrack_faulted(__faulted);				\
> +	if (__faulted) {						\
> +		*((type *)dst) = (type) 0;				\
>  		goto err_label;						\
> +	}								\
> +	*((type *)dst) = get_unaligned((type *)(src));			\
> +	current->thread.segv_continue = NULL;				\
>  } while (0)
>  
>  #define __put_kernel_nofault(dst, src, type, err_label)			\
>  do {									\
> -	put_unaligned(*((type *)src), (type *)(dst));			\
> -	if (0) /* make sure the label looks used to the compiler */	\
> +	int __faulted;							\
> +									\
> +	___backtrack_faulted(__faulted);				\
> +	if (__faulted)							\
>  		goto err_label;						\
> +	put_unaligned(*((type *)src), (type *)(dst));			\
> +	current->thread.segv_continue = NULL;				\
>  } while (0)
>  
>  #endif
...
> diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
> index ee88f88974ea..26fb4835d3e9 100644
> --- a/arch/x86/um/shared/sysdep/faultinfo_64.h
> +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h
> @@ -29,4 +29,16 @@ struct faultinfo {
>  
>  #define PTRACE_FULL_FAULTINFO 1
>  
> +#define ___backtrack_faulted(_faulted)					\
> +	asm volatile (							\
> +		"mov $0, %0\n"						\
> +		"movq $__get_kernel_nofault_faulted_%=,%1\n"		\
> +		"jmp _end_%=\n"						\
> +		"__get_kernel_nofault_faulted_%=:\n"			\
> +		"mov $1, %0;"						\
> +		"_end_%=:"						\
> +		: "=r" (_faulted),					\
> +		  "=m" (current->thread.segv_continue) ::		\
> +	)
> +
>  #endif

I bisected a crash that our CI sees with this change as
commit d1d7f01f7cd3 ("um: mark rodata read-only and implement _nofault
accesses"). Unfortunately, I only see this with clang, I do not see it
with GCC 14.2.0 but I am not sure where I would start trying to figure
out what is going on here.

  $ make -skj"$(nproc)" ARCH=um LLVM=1 mrproper defconfig vmlinux

  # Get rootfs via
  #   curl -LSs https://github.com/ClangBuiltLinux/boot-utils/releases/download/20241120-044434/x86_64-rootfs.ext4.zst | zstd -d >rootfs.ext4
  # if necessary
  $ ./vmlinux ubd0=$PWD/rootfs.ext4
  Core dump limits :
          soft - NONE
          hard - NONE
  Checking that ptrace can change system call numbers...OK
  Checking syscall emulation for ptrace...OK
  Checking environment variables for a tempdir...none found
  Checking if /dev/shm is on tmpfs...OK
  Checking PROT_EXEC mmap in /dev/shm...OK
  Linux version 6.14.0-rc5-00002-gd1d7f01f7cd3 (nathan@n3-xlarge-x86) (ClangBuiltLinux clang version 20.1.2 (https://github.com/llvm/llvm-project.git 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247), ClangBuiltLinux LLD 20.1.2 (https://github.com/llvm/llvm-project.git 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247)) #1 Wed Apr  2 15:07:50 MST 2025
  ...
  EXT4-fs (ubda): orphan cleanup on readonly fs
  EXT4-fs (ubda): mounted filesystem 267f8e03-3e3c-4aec-979b-f4d9b964b716 ro with ordered data mode. Quota mode: none.
  VFS: Mounted root (ext4 filesystem) readonly on device 98:0.
  devtmpfs: mounted
  Run /sbin/init as init process

  Modules linked in:
  Pid: 24, comm: mount Not tainted 6.14.0-rc5-00002-gd1d7f01f7cd3
  RIP: 0033:_end_0+0x38/0x45
  RSP: 00000000648bfca0  EFLAGS: 00010206
  RAX: 0000000000000000 RBX: 00000000609c7ff8 RCX: 00000000606350b8
  RDX: 00000000648bfc5e RSI: 0000000000001000 RDI: 0000000060c0c000
  RBP: 00000000648bfcc0 R08: 0000000000000000 R09: 0000000060c0c780
  R10: 0000000000000000 R11: 0000000000000206 R12: 00000000648bfd60
  R13: 0000000060c0c900 R14: 0000000060c0c9f8 R15: 0000000000000007
  Kernel panic - not syncing: Kernel mode fault at addr 0x790, ip 0x600c268a
  CPU: 0 UID: 0 PID: 24 Comm: mount Not tainted 6.14.0-rc5-00002-gd1d7f01f7cd3 #1
  Stack:
   600c25e9 00000007 609c7ff8 608c4780
   648bfcf0 601533ca 601533ab 648bfd60
   648bfdb0 608c4780 648bfd10 60152ebe
  Call Trace:
   [<600c25e9>] ? copy_from_kernel_nofault+0x0/0x64
   [<601533ca>] prepend_copy+0x1f/0x51
   [<601533ab>] ? prepend_copy+0x0/0x51
   [<60152ebe>] prepend+0x60/0x67
   [<60153379>] prepend_name+0x1f/0x51
   [<6015335a>] ? prepend_name+0x0/0x51
   [<60152bad>] prepend_path+0xb9/0x1d8
   [<6003cb83>] ? um_set_signals+0x0/0x3b
   [<60152e17>] d_path+0xce/0x115
   [<601867f5>] proc_pid_readlink+0xa1/0x17d
   [<6012b182>] vfs_readlink+0xec/0xee
   [<60138920>] ? touch_atime+0x0/0x162
   [<60120341>] do_readlinkat+0xbe/0x13a
   [<6011f91b>] sys_readlinkat+0x10/0x14
   [<6002caad>] handle_syscall+0x7b/0xaa
   [<6002ca32>] ? handle_syscall+0x0/0xaa
   [<6003f687>] userspace+0x289/0x4a5
   [<6002a8e3>] fork_handler+0x56/0x5d

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] um: mark rodata read-only and implement _nofault accesses
  2025-04-02 22:12   ` [PATCH 1/2] um: mark rodata read-only and implement _nofault accesses Nathan Chancellor
@ 2025-04-03  6:20     ` Benjamin Berg
  2025-04-03 19:19       ` Nathan Chancellor
  0 siblings, 1 reply; 4+ messages in thread
From: Benjamin Berg @ 2025-04-03  6:20 UTC (permalink / raw)
  To: Nathan Chancellor; +Cc: linux-um, Johannes Berg, llvm

Hi Nathan,

oops, that is a little logic bug in the code. When we are coming from
userspace, we may be running as part of a user task and have an mm. And
then the new "current->pagefault_disabled" logic is skipped. So when
prepend_copy is doing its nofault copy to evade a lock it is crashing
instead of failing gracefully and retrying.

In segv_handler, can you try moving that into a separate "else if"
block just above the "current->mm == NULL" check?

Something like the patch I copied below.

Benjamin

diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index cbe924a0fa87..8a2e68d07de6 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -330,20 +330,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 			panic("Failed to sync kernel TLBs: %d", err);
 		goto out;
 	}
-	else if (current->mm == NULL) {
-		if (current->pagefault_disabled) {
-			if (!mc) {
-				show_regs(container_of(regs, struct pt_regs, regs));
-				panic("Segfault with pagefaults disabled but no mcontext");
-			}
-			if (!current->thread.segv_continue) {
-				show_regs(container_of(regs, struct pt_regs, regs));
-				panic("Segfault without recovery target");
-			}
-			mc_set_rip(mc, current->thread.segv_continue);
-			current->thread.segv_continue = NULL;
-			goto out;
+	else if (current->pagefault_disabled) {
+		if (!mc) {
+			show_regs(container_of(regs, struct pt_regs, regs));
+			panic("Segfault with pagefaults disabled but no mcontext");
 		}
+		if (!current->thread.segv_continue) {
+			show_regs(container_of(regs, struct pt_regs, regs));
+			panic("Segfault without recovery target");
+		}
+		mc_set_rip(mc, current->thread.segv_continue);
+		current->thread.segv_continue = NULL;
+		goto out;
+	}
+	else if (current->mm == NULL) {
 		show_regs(container_of(regs, struct pt_regs, regs));
 		panic("Segfault with no mm");
 	}


On Wed, 2025-04-02 at 15:12 -0700, Nathan Chancellor wrote:
> Hi Benjamin and Johannes,
> 
> On Mon, Feb 10, 2025 at 05:09:25PM +0100, Benjamin Berg wrote:
> > From: Johannes Berg <johannes.berg@intel.com>
> > 
> > Mark read-only data actually read-only (simple mprotect), and
> > to be able to test it also implement _nofault accesses. This
> > works by setting up a new "segv_continue" pointer in current,
> > and then when we hit a segfault we change the signal return
> > context so that we continue at that address. The code using
> > this sets it up so that it jumps to a label and then aborts
> > the access that way, returning -EFAULT.
> > 
> > It's possible to optimize the ___backtrack_faulted() thing by
> > using asm goto (compiler version dependent) and/or gcc's (not
> > sure if clang has it) &&label extension, but at least in one
> > attempt I made the && caused the compiler to not load -EFAULT
> > into the register in case of jumping to the &&label from the
> > fault handler. So leave it like this for now.
> > 
> > Co-developed-by: Benjamin Berg <benjamin.berg@intel.com>
> > Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> > Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
> > ---
> >  arch/um/Kconfig                          |  1 +
> >  arch/um/include/asm/processor-generic.h  |  2 ++
> >  arch/um/include/asm/uaccess.h            | 20 ++++++++++++-----
> >  arch/um/include/shared/arch.h            |  2 ++
> >  arch/um/include/shared/as-layout.h       |  2 +-
> >  arch/um/include/shared/irq_user.h        |  3 ++-
> >  arch/um/include/shared/kern_util.h       | 12 ++++++----
> >  arch/um/kernel/irq.c                     |  3 ++-
> >  arch/um/kernel/mem.c                     | 10 +++++++++
> >  arch/um/kernel/trap.c                    | 28 +++++++++++++++++++-----
> >  arch/um/os-Linux/signal.c                |  4 ++--
> >  arch/um/os-Linux/skas/process.c          |  8 +++----
> >  arch/x86/um/os-Linux/mcontext.c          | 12 ++++++++++
> >  arch/x86/um/shared/sysdep/faultinfo_32.h | 12 ++++++++++
> >  arch/x86/um/shared/sysdep/faultinfo_64.h | 12 ++++++++++
> >  15 files changed, 108 insertions(+), 23 deletions(-)
> > 
> > diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> > index 18051b1cfce0..79509c7f39de 100644
> > --- a/arch/um/Kconfig
> > +++ b/arch/um/Kconfig
> > @@ -12,6 +12,7 @@ config UML
> >  	select ARCH_HAS_KCOV
> >  	select ARCH_HAS_STRNCPY_FROM_USER
> >  	select ARCH_HAS_STRNLEN_USER
> > +	select ARCH_HAS_STRICT_KERNEL_RWX
> >  	select HAVE_ARCH_AUDITSYSCALL
> >  	select HAVE_ARCH_KASAN if X86_64
> >  	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> > diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> > index 5d6356eafffe..8a789c17acd8 100644
> > --- a/arch/um/include/asm/processor-generic.h
> > +++ b/arch/um/include/asm/processor-generic.h
> > @@ -31,6 +31,8 @@ struct thread_struct {
> >  		} thread;
> >  	} request;
> >  
> > +	void *segv_continue;
> > +
> >  	/* Contains variable sized FP registers */
> >  	struct pt_regs regs;
> >  };
> > diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h
> > index 1d4b6bbc1b65..3a08f9029a3f 100644
> > --- a/arch/um/include/asm/uaccess.h
> > +++ b/arch/um/include/asm/uaccess.h
> > @@ -9,6 +9,7 @@
> >  
> >  #include <asm/elf.h>
> >  #include <linux/unaligned.h>
> > +#include <sysdep/faultinfo.h>
> >  
> >  #define __under_task_size(addr, size) \
> >  	(((unsigned long) (addr) < TASK_SIZE) && \
> > @@ -44,19 +45,28 @@ static inline int __access_ok(const void __user *ptr, unsigned long size)
> >  		 __access_ok_vsyscall(addr, size));
> >  }
> >  
> > -/* no pagefaults for kernel addresses in um */
> >  #define __get_kernel_nofault(dst, src, type, err_label)			\
> >  do {									\
> > -	*((type *)dst) = get_unaligned((type *)(src));			\
> > -	if (0) /* make sure the label looks used to the compiler */	\
> > +	int __faulted;							\
> > +									\
> > +	___backtrack_faulted(__faulted);				\
> > +	if (__faulted) {						\
> > +		*((type *)dst) = (type) 0;				\
> >  		goto err_label;						\
> > +	}								\
> > +	*((type *)dst) = get_unaligned((type *)(src));			\
> > +	current->thread.segv_continue = NULL;				\
> >  } while (0)
> >  
> >  #define __put_kernel_nofault(dst, src, type, err_label)			\
> >  do {									\
> > -	put_unaligned(*((type *)src), (type *)(dst));			\
> > -	if (0) /* make sure the label looks used to the compiler */	\
> > +	int __faulted;							\
> > +									\
> > +	___backtrack_faulted(__faulted);				\
> > +	if (__faulted)							\
> >  		goto err_label;						\
> > +	put_unaligned(*((type *)src), (type *)(dst));			\
> > +	current->thread.segv_continue = NULL;				\
> >  } while (0)
> >  
> >  #endif
> ...
> > diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h
> > index ee88f88974ea..26fb4835d3e9 100644
> > --- a/arch/x86/um/shared/sysdep/faultinfo_64.h
> > +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h
> > @@ -29,4 +29,16 @@ struct faultinfo {
> >  
> >  #define PTRACE_FULL_FAULTINFO 1
> >  
> > +#define ___backtrack_faulted(_faulted)					\
> > +	asm volatile (							\
> > +		"mov $0, %0\n"						\
> > +		"movq $__get_kernel_nofault_faulted_%=,%1\n"		\
> > +		"jmp _end_%=\n"						\
> > +		"__get_kernel_nofault_faulted_%=:\n"			\
> > +		"mov $1, %0;"						\
> > +		"_end_%=:"						\
> > +		: "=r" (_faulted),					\
> > +		  "=m" (current->thread.segv_continue) ::		\
> > +	)
> > +
> >  #endif
> 
> I bisected a crash that our CI sees with this change as
> commit d1d7f01f7cd3 ("um: mark rodata read-only and implement _nofault
> accesses"). Unfortunately, I only see this with clang, I do not see it
> with GCC 14.2.0 but I am not sure where I would start trying to figure
> out what is going on here.
> 
>   $ make -skj"$(nproc)" ARCH=um LLVM=1 mrproper defconfig vmlinux
> 
>   # Get rootfs via
>   #   curl -LSs https://github.com/ClangBuiltLinux/boot-utils/releases/download/20241120-044434/x86_64-rootfs.ext4.zst | zstd -d >rootfs.ext4
>   # if necessary
>   $ ./vmlinux ubd0=$PWD/rootfs.ext4
>   Core dump limits :
>           soft - NONE
>           hard - NONE
>   Checking that ptrace can change system call numbers...OK
>   Checking syscall emulation for ptrace...OK
>   Checking environment variables for a tempdir...none found
>   Checking if /dev/shm is on tmpfs...OK
>   Checking PROT_EXEC mmap in /dev/shm...OK
>   Linux version 6.14.0-rc5-00002-gd1d7f01f7cd3 (nathan@n3-xlarge-x86) (ClangBuiltLinux clang version 20.1.2 (https://github.com/llvm/llvm-project.git 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247), ClangBuiltLinux LLD 20.1.2 (https://github.com/llvm/llvm-project.git 58df0ef89dd64126512e4ee27b4ac3fd8ddf6247)) #1 Wed Apr  2 15:07:50 MST 2025
>   ...
>   EXT4-fs (ubda): orphan cleanup on readonly fs
>   EXT4-fs (ubda): mounted filesystem 267f8e03-3e3c-4aec-979b-f4d9b964b716 ro with ordered data mode. Quota mode: none.
>   VFS: Mounted root (ext4 filesystem) readonly on device 98:0.
>   devtmpfs: mounted
>   Run /sbin/init as init process
> 
>   Modules linked in:
>   Pid: 24, comm: mount Not tainted 6.14.0-rc5-00002-gd1d7f01f7cd3
>   RIP: 0033:_end_0+0x38/0x45
>   RSP: 00000000648bfca0  EFLAGS: 00010206
>   RAX: 0000000000000000 RBX: 00000000609c7ff8 RCX: 00000000606350b8
>   RDX: 00000000648bfc5e RSI: 0000000000001000 RDI: 0000000060c0c000
>   RBP: 00000000648bfcc0 R08: 0000000000000000 R09: 0000000060c0c780
>   R10: 0000000000000000 R11: 0000000000000206 R12: 00000000648bfd60
>   R13: 0000000060c0c900 R14: 0000000060c0c9f8 R15: 0000000000000007
>   Kernel panic - not syncing: Kernel mode fault at addr 0x790, ip 0x600c268a
>   CPU: 0 UID: 0 PID: 24 Comm: mount Not tainted 6.14.0-rc5-00002-gd1d7f01f7cd3 #1
>   Stack:
>    600c25e9 00000007 609c7ff8 608c4780
>    648bfcf0 601533ca 601533ab 648bfd60
>    648bfdb0 608c4780 648bfd10 60152ebe
>   Call Trace:
>    [<600c25e9>] ? copy_from_kernel_nofault+0x0/0x64
>    [<601533ca>] prepend_copy+0x1f/0x51
>    [<601533ab>] ? prepend_copy+0x0/0x51
>    [<60152ebe>] prepend+0x60/0x67
>    [<60153379>] prepend_name+0x1f/0x51
>    [<6015335a>] ? prepend_name+0x0/0x51
>    [<60152bad>] prepend_path+0xb9/0x1d8
>    [<6003cb83>] ? um_set_signals+0x0/0x3b
>    [<60152e17>] d_path+0xce/0x115
>    [<601867f5>] proc_pid_readlink+0xa1/0x17d
>    [<6012b182>] vfs_readlink+0xec/0xee
>    [<60138920>] ? touch_atime+0x0/0x162
>    [<60120341>] do_readlinkat+0xbe/0x13a
>    [<6011f91b>] sys_readlinkat+0x10/0x14
>    [<6002caad>] handle_syscall+0x7b/0xaa
>    [<6002ca32>] ? handle_syscall+0x0/0xaa
>    [<6003f687>] userspace+0x289/0x4a5
>    [<6002a8e3>] fork_handler+0x56/0x5d
> 
> Cheers,
> Nathan
> 


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] um: mark rodata read-only and implement _nofault accesses
  2025-04-03  6:20     ` Benjamin Berg
@ 2025-04-03 19:19       ` Nathan Chancellor
  2025-04-03 20:47         ` Johannes Berg
  0 siblings, 1 reply; 4+ messages in thread
From: Nathan Chancellor @ 2025-04-03 19:19 UTC (permalink / raw)
  To: Benjamin Berg; +Cc: linux-um, Johannes Berg, llvm

On Thu, Apr 03, 2025 at 08:20:23AM +0200, Benjamin Berg wrote:
> Hi Nathan,
> 
> oops, that is a little logic bug in the code. When we are coming from
> userspace, we may be running as part of a user task and have an mm. And
> then the new "current->pagefault_disabled" logic is skipped. So when
> prepend_copy is doing its nofault copy to evade a lock it is crashing
> instead of failing gracefully and retrying.
> 
> In segv_handler, can you try moving that into a separate "else if"
> block just above the "current->mm == NULL" check?
> 
> Something like the patch I copied below.

Thanks, I applied that change, which shows a slightly different crash
message now:

Modules linked in:
Pid: 24, comm: mount Not tainted 6.14.0-rc5-00002-gd1d7f01f7cd3-dirty
RIP: 0033:_end_0+0x38/0x45
RSP: 00000000648bfca0  EFLAGS: 00010206
RAX: 0000000000000000 RBX: 00000000609c7ff8 RCX: 00000000606350b8
RDX: 00000000648bfc5e RSI: 0000000000001000 RDI: 0000000060c0c000
RBP: 00000000648bfcc0 R08: 0000000000000000 R09: 0000000060c0c780
R10: 0000000000000000 R11: 0000000000000206 R12: 00000000648bfd60
R13: 0000000060c0c900 R14: 0000000060c0c9f8 R15: 0000000000000007
Kernel panic - not syncing: Segfault without recovery target
CPU: 0 UID: 0 PID: 24 Comm: mount Not tainted 6.14.0-rc5-00002-gd1d7f01f7cd3-dirty #1
Stack:
 600c25f9 00000007 609c7ff8 608c4780
 648bfcf0 601533da 601533bb 648bfd60
 648bfdb0 608c4780 648bfd10 60152ece
Call Trace:
 [<600c25f9>] ? copy_from_kernel_nofault+0x0/0x64
 [<601533da>] prepend_copy+0x1f/0x51
 [<601533bb>] ? prepend_copy+0x0/0x51
 [<60152ece>] prepend+0x60/0x67
 [<60153389>] prepend_name+0x1f/0x51
 [<6015336a>] ? prepend_name+0x0/0x51
 [<60152bbd>] prepend_path+0xb9/0x1d8
 [<6003cb91>] ? um_set_signals+0x0/0x3b
 [<60152e27>] d_path+0xce/0x115
 [<60186805>] proc_pid_readlink+0xa1/0x17d
 [<6012b192>] vfs_readlink+0xec/0xee
 [<60138930>] ? touch_atime+0x0/0x162
 [<60120351>] do_readlinkat+0xbe/0x13a
 [<6011f92b>] sys_readlinkat+0x10/0x14
 [<6002cabb>] handle_syscall+0x7b/0xaa
 [<6002ca40>] ? handle_syscall+0x0/0xaa
 [<6003f695>] userspace+0x289/0x4a5
 [<6002a8e3>] fork_handler+0x56/0x5d

> diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
> index cbe924a0fa87..8a2e68d07de6 100644
> --- a/arch/um/kernel/trap.c
> +++ b/arch/um/kernel/trap.c
> @@ -330,20 +330,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
>  			panic("Failed to sync kernel TLBs: %d", err);
>  		goto out;
>  	}
> -	else if (current->mm == NULL) {
> -		if (current->pagefault_disabled) {
> -			if (!mc) {
> -				show_regs(container_of(regs, struct pt_regs, regs));
> -				panic("Segfault with pagefaults disabled but no mcontext");
> -			}
> -			if (!current->thread.segv_continue) {
> -				show_regs(container_of(regs, struct pt_regs, regs));
> -				panic("Segfault without recovery target");
> -			}
> -			mc_set_rip(mc, current->thread.segv_continue);
> -			current->thread.segv_continue = NULL;
> -			goto out;
> +	else if (current->pagefault_disabled) {
> +		if (!mc) {
> +			show_regs(container_of(regs, struct pt_regs, regs));
> +			panic("Segfault with pagefaults disabled but no mcontext");
>  		}
> +		if (!current->thread.segv_continue) {
> +			show_regs(container_of(regs, struct pt_regs, regs));
> +			panic("Segfault without recovery target");
> +		}
> +		mc_set_rip(mc, current->thread.segv_continue);
> +		current->thread.segv_continue = NULL;
> +		goto out;
> +	}
> +	else if (current->mm == NULL) {
>  		show_regs(container_of(regs, struct pt_regs, regs));
>  		panic("Segfault with no mm");
>  	}

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] um: mark rodata read-only and implement _nofault accesses
  2025-04-03 19:19       ` Nathan Chancellor
@ 2025-04-03 20:47         ` Johannes Berg
  0 siblings, 0 replies; 4+ messages in thread
From: Johannes Berg @ 2025-04-03 20:47 UTC (permalink / raw)
  To: Nathan Chancellor, Benjamin Berg; +Cc: linux-um, llvm

On Thu, 2025-04-03 at 12:19 -0700, Nathan Chancellor wrote:
> 
> Thanks, I applied that change, which shows a slightly different crash
> message now:

Pretty sure it's all just a bug in my inline assembly, and clang
allocates registers differently:

#define ___backtrack_faulted(_faulted)                                  \
        asm volatile (                                                  \
                "mov $0, %0\n"                                          \
                "movq $__get_kernel_nofault_faulted_%=,%1\n"            \
                "jmp _end_%=\n"                                         \
                "__get_kernel_nofault_faulted_%=:\n"                    \
                "mov $1, %0;"                                           \
                "_end_%=:"                                              \
                : "=r" (_faulted),                                      \
                  "=m" (current->thread.segv_continue) ::               \
        )


It _looks_ as though both %0 and %1 are output only, but clang compiles
it to:

  51:   48 83 fb 08             cmp    $0x8,%rbx
  55:   72 44                   jb     9b <_end_0+0x2a>
  57:   48 8b 01                mov    (%rcx),%rax

// start inline assembly ---vvv--- //
  5a:   b8 00 00 00 00          mov    $0x0,%eax
  5f:   48 c7 80 90 07 00 00    movq   $0x0,0x790(%rax)       // crash
  66:   00 00 00 00 
                        66: R_X86_64_32S        .text+0x6c
  6a:   eb 05                   jmp    71 <_end_0>

000000000000006c <__get_kernel_nofault_faulted_0>:
  6c:   b8 01 00 00 00          mov    $0x1,%eax
// end inline assembly ---^^^--- //

0000000000000071 <_end_0>:
  71:   85 c0                   test   %eax,%eax
  73:   75 56                   jne    cb <_end_1+0x10>


which clearly cannot work? I must be missing something. Switching the
first two instructions fixes it, of course, but right now I can't see
what I forgot in terms of constraints to make the compiler not do that.
Probably trivial to someone more familiar with inline assembly.
Modifying the _faulted to be +r instead of =r also fixes it.

johannes

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-04-03 20:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20250210160926.420133-1-benjamin@sipsolutions.net>
     [not found] ` <20250210160926.420133-2-benjamin@sipsolutions.net>
2025-04-02 22:12   ` [PATCH 1/2] um: mark rodata read-only and implement _nofault accesses Nathan Chancellor
2025-04-03  6:20     ` Benjamin Berg
2025-04-03 19:19       ` Nathan Chancellor
2025-04-03 20:47         ` Johannes Berg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox