- * [PATCH 01/27] um: Switch printk calls to adhere to correct coding style
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
@ 2021-03-03 15:54 ` Benjamin Berg
  2021-03-03 15:54 ` [PATCH 02/27] um: Declare fix_range_common as a static function Benjamin Berg
                   ` (25 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:54 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
This means having the string literal in one line and using __func__
where appropriate.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/kernel/exec.c           |   4 +-
 arch/um/os-Linux/skas/mem.c     |  19 +++--
 arch/um/os-Linux/skas/process.c | 121 ++++++++++++++++----------------
 3 files changed, 69 insertions(+), 75 deletions(-)
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 4d8498100341..cd05bf98265d 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -28,8 +28,8 @@ void flush_thread(void)
 
 	ret = unmap(¤t->mm->context.id, 0, TASK_SIZE, 1, &data);
 	if (ret) {
-		printk(KERN_ERR "flush_thread - clearing address space failed, "
-		       "err = %d\n", ret);
+		printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
+		       __func__, ret);
 		force_sig(SIGKILL);
 	}
 	get_safe_registers(current_pt_regs()->regs.gp,
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 3b4975ee67e2..953fb10f3f93 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -60,8 +60,8 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 		printk(UM_KERN_ERR "Registers - \n");
 		for (i = 0; i < MAX_REG_NR; i++)
 			printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
-		panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n",
-		      -n);
+		panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+		      __func__, -n);
 	}
 
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
@@ -81,20 +81,17 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 	offset = *((unsigned long *) mm_idp->stack + 1);
 	if (offset) {
 		data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
-		printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, "
-		       "data = %p\n", ret, offset, data);
+		printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n",
+		       __func__, ret, offset, data);
 		syscall = (unsigned long *)((unsigned long)data + data[0]);
-		printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, "
-		       "return value = 0x%lx, expected return value = 0x%lx\n",
-		       syscall[0], ret, syscall[7]);
-		printk(UM_KERN_ERR "    syscall parameters: "
-		       "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+		printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
+		       __func__, syscall[0], ret, syscall[7]);
+		printk(UM_KERN_ERR "    syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
 		       syscall[1], syscall[2], syscall[3],
 		       syscall[4], syscall[5], syscall[6]);
 		for (n = 1; n < data[0]/sizeof(long); n++) {
 			if (n == 1)
-				printk(UM_KERN_ERR "    additional syscall "
-				       "data:");
+				printk(UM_KERN_ERR "    additional syscall data:");
 			if (n % 4 == 1)
 				printk("\n" UM_KERN_ERR "      ");
 			printk("  0x%lx", data[n]);
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index fba674fac8b7..8c524e27571c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -117,8 +117,8 @@ void wait_stub_done(int pid)
 
 		err = ptrace(PTRACE_CONT, pid, 0, 0);
 		if (err) {
-			printk(UM_KERN_ERR "wait_stub_done : continue failed, "
-			       "errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s : continue failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 	}
@@ -129,11 +129,10 @@ void wait_stub_done(int pid)
 bad_wait:
 	err = ptrace_dump_regs(pid);
 	if (err)
-		printk(UM_KERN_ERR "Failed to get registers from stub, "
-		       "errno = %d\n", -err);
-	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
-	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
-	       status);
+		printk(UM_KERN_ERR "Failed to get registers from stub, errno = %d\n",
+		       -err);
+	printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, n = %d, errno = %d, status = 0x%x\n",
+	       __func__, pid, n, errno, status);
 	fatal_sigsegv();
 }
 
@@ -194,15 +193,15 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
 		err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
 			     __NR_getpid);
 		if (err < 0) {
-			printk(UM_KERN_ERR "handle_trap - nullifying syscall "
-			       "failed, errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - nullifying syscall failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
 		err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
 		if (err < 0) {
-			printk(UM_KERN_ERR "handle_trap - continuing to end of "
-			       "syscall failed, errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - continuing to end of syscall failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
@@ -211,11 +210,10 @@ static void handle_trap(int pid, struct uml_pt_regs *regs,
 		    (WSTOPSIG(status) != SIGTRAP + 0x80)) {
 			err = ptrace_dump_regs(pid);
 			if (err)
-				printk(UM_KERN_ERR "Failed to get registers "
-				       "from process, errno = %d\n", -err);
-			printk(UM_KERN_ERR "handle_trap - failed to wait at "
-			       "end of syscall, errno = %d, status = %d\n",
-			       errno, status);
+				printk(UM_KERN_ERR "Failed to get registers from process, errno = %d\n",
+				       -err);
+			printk(UM_KERN_ERR "%s - failed to wait at end of syscall, errno = %d, status = %d\n",
+			       __func__, errno, status);
 			fatal_sigsegv();
 		}
 	}
@@ -255,8 +253,8 @@ static int userspace_tramp(void *stack)
 	addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
 		      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
 	if (addr == MAP_FAILED) {
-		printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
-		       "errno = %d\n", STUB_CODE, errno);
+		printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, errno = %d\n",
+		       STUB_CODE, errno);
 		exit(1);
 	}
 
@@ -266,8 +264,7 @@ static int userspace_tramp(void *stack)
 			    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
 			    MAP_FIXED | MAP_SHARED, fd, offset);
 		if (addr == MAP_FAILED) {
-			printk(UM_KERN_ERR "mapping segfault stack "
-			       "at 0x%lx failed, errno = %d\n",
+			printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
 			       STUB_DATA, errno);
 			exit(1);
 		}
@@ -285,8 +282,8 @@ static int userspace_tramp(void *stack)
 		sa.sa_sigaction = (void *) v;
 		sa.sa_restorer = NULL;
 		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-			printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV "
-			       "handler failed - errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
+			       __func__, errno);
 			exit(1);
 		}
 	}
@@ -321,8 +318,8 @@ int start_userspace(unsigned long stub_stack)
 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (stack == MAP_FAILED) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : mmap failed, "
-		       "errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : mmap failed, errno = %d\n",
+		       __func__, errno);
 		return err;
 	}
 
@@ -335,8 +332,8 @@ int start_userspace(unsigned long stub_stack)
 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
 	if (pid < 0) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : clone failed, "
-		       "errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
+		       __func__, errno);
 		return err;
 	}
 
@@ -344,31 +341,31 @@ int start_userspace(unsigned long stub_stack)
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
 		if (n < 0) {
 			err = -errno;
-			printk(UM_KERN_ERR "start_userspace : wait failed, "
-			       "errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+			       __func__, errno);
 			goto out_kill;
 		}
 	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
 
 	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
 		err = -EINVAL;
-		printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got "
-		       "status = %d\n", status);
+		printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+		       __func__, status);
 		goto out_kill;
 	}
 
 	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS "
-		       "failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
+		       __func__, errno);
 		goto out_kill;
 	}
 
 	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
 		err = -errno;
-		printk(UM_KERN_ERR "start_userspace : munmap failed, "
-		       "errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : munmap failed, errno = %d\n",
+		       __func__, errno);
 		goto out_kill;
 	}
 
@@ -402,14 +399,14 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 		 * just kill the process.
 		 */
 		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "userspace - ptrace set regs "
-			       "failed, errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
 		if (put_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "userspace - ptrace set fp regs "
-			       "failed, errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
@@ -420,28 +417,28 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 					     singlestepping(NULL));
 
 		if (ptrace(op, pid, 0, 0)) {
-			printk(UM_KERN_ERR "userspace - ptrace continue "
-			       "failed, op = %d, errno = %d\n", op, errno);
+			printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+			       __func__, op, errno);
 			fatal_sigsegv();
 		}
 
 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
 		if (err < 0) {
-			printk(UM_KERN_ERR "userspace - wait failed, "
-			       "errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
 		regs->is_user = 1;
 		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, "
-			       "errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
 		if (get_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "userspace -  get_fp_registers failed, "
-			       "errno = %d\n", errno);
+			printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
+			       __func__, errno);
 			fatal_sigsegv();
 		}
 
@@ -493,8 +490,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 				unblock_signals_trace();
 				break;
 			default:
-				printk(UM_KERN_ERR "userspace - child stopped "
-				       "with signal %d\n", sig);
+				printk(UM_KERN_ERR "%s - child stopped with signal %d\n",
+				       __func__, sig);
 				fatal_sigsegv();
 			}
 			pid = userspace_pid[0];
@@ -554,15 +551,15 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	err = ptrace_setregs(pid, thread_regs);
 	if (err < 0) {
 		err = -errno;
-		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS "
-		       "failed, pid = %d, errno = %d\n", pid, -err);
+		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
+		      __func__, pid, -err);
 		return err;
 	}
 
 	err = put_fp_registers(pid, thread_fp_regs);
 	if (err < 0) {
-		printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers "
-		       "failed, pid = %d, err = %d\n", pid, err);
+		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
+		       __func__, pid, err);
 		return err;
 	}
 
@@ -573,8 +570,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
 	if (err) {
 		err = -errno;
-		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, "
-		       "errno = %d\n", pid, errno);
+		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
+		       pid, errno);
 		return err;
 	}
 
@@ -582,8 +579,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 
 	pid = data->parent_err;
 	if (pid < 0) {
-		printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports "
-		       "error %d\n", -pid);
+		printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
+		      __func__, -pid);
 		return pid;
 	}
 
@@ -593,8 +590,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	 */
 	wait_stub_done(pid);
 	if (child_data->child_err != STUB_DATA) {
-		printk(UM_KERN_ERR "copy_context_skas0 - stub-child %d reports "
-		       "error %ld\n", pid, data->child_err);
+		printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
+		       __func__, pid, data->child_err);
 		err = data->child_err;
 		goto out_kill;
 	}
@@ -602,8 +599,8 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
 		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
-		printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS "
-		       "failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
+		       __func__, errno);
 		goto out_kill;
 	}
 
@@ -671,8 +668,8 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 		kmalloc_ok = 0;
 		return 1;
 	default:
-		printk(UM_KERN_ERR "Bad sigsetjmp return in "
-		       "start_idle_thread - %d\n", n);
+		printk(UM_KERN_ERR "Bad sigsetjmp return in %s - %d\n",
+		       __func__, n);
 		fatal_sigsegv();
 	}
 	longjmp(*switch_buf, 1);
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 02/27] um: Declare fix_range_common as a static function
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
  2021-03-03 15:54 ` [PATCH 01/27] um: Switch printk calls to adhere to correct coding style Benjamin Berg
@ 2021-03-03 15:54 ` Benjamin Berg
  2021-03-03 15:54 ` [PATCH 03/27] um: Drop support for hosts without SYSEMU_SINGLESTEP support Benjamin Berg
                   ` (24 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:54 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
It is only used within the same file.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/kernel/tlb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index bc38f79ca3a3..3c709e6146dc 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -314,8 +314,8 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
 	return ret;
 }
 
-void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-		      unsigned long end_addr, int force)
+static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
+			     unsigned long end_addr, int force)
 {
 	pgd_t *pgd;
 	struct host_vm_change hvc;
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 03/27] um: Drop support for hosts without SYSEMU_SINGLESTEP support
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
  2021-03-03 15:54 ` [PATCH 01/27] um: Switch printk calls to adhere to correct coding style Benjamin Berg
  2021-03-03 15:54 ` [PATCH 02/27] um: Declare fix_range_common as a static function Benjamin Berg
@ 2021-03-03 15:54 ` Benjamin Berg
  2021-06-19 20:17   ` Richard Weinberger
  2021-03-03 15:55 ` [PATCH 04/27] um: Drop NULL check from start_userspace Benjamin Berg
                   ` (23 subsequent siblings)
  26 siblings, 1 reply; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:54 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
These features have existed since Linux 2.6.14 and can be considered
widely available at this point.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/asm/processor-generic.h |  1 -
 arch/um/include/shared/kern_util.h      |  1 -
 arch/um/include/shared/ptrace_user.h    | 41 --------------
 arch/um/kernel/process.c                | 75 +------------------------
 arch/um/kernel/ptrace.c                 |  2 -
 arch/um/kernel/signal.c                 | 12 ----
 arch/um/os-Linux/skas/process.c         | 56 +++---------------
 arch/um/os-Linux/start_up.c             | 70 ++---------------------
 arch/x86/um/ptrace_32.c                 | 24 --------
 arch/x86/um/ptrace_64.c                 | 26 ---------
 arch/x86/um/shared/sysdep/ptrace_32.h   |  4 --
 11 files changed, 15 insertions(+), 297 deletions(-)
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index afd9b267cf81..1cafe30a0241 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -21,7 +21,6 @@ struct mm_struct;
 struct thread_struct {
 	struct pt_regs regs;
 	struct pt_regs *segv_regs;
-	int singlestep_syscall;
 	void *fault_addr;
 	jmp_buf *fault_catcher;
 	struct task_struct *prev_sched;
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 2888ec812f6e..d474a5f3d131 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -35,7 +35,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip,
 extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
 extern int smp_sigio_handler(void);
 extern void initial_thread_cb(void (*proc)(void *), void *arg);
-extern int is_syscall(unsigned long addr);
 
 extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
 
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 95455e8996e7..8a705d8f96ce 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -12,45 +12,4 @@
 extern int ptrace_getregs(long pid, unsigned long *regs_out);
 extern int ptrace_setregs(long pid, unsigned long *regs_in);
 
-/* syscall emulation path in ptrace */
-
-#ifndef PTRACE_SYSEMU
-#define PTRACE_SYSEMU 31
-#endif
-#ifndef PTRACE_SYSEMU_SINGLESTEP
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#endif
-
-/* On architectures, that started to support PTRACE_O_TRACESYSGOOD
- * in linux 2.4, there are two different definitions of
- * PTRACE_SETOPTIONS: linux 2.4 uses 21 while linux 2.6 uses 0x4200.
- * For binary compatibility, 2.6 also supports the old "21", named
- * PTRACE_OLDSETOPTION. On these architectures, UML always must use
- * "21", to ensure the kernel runs on 2.4 and 2.6 host without
- * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
- * We also want to be able to build the kernel on 2.4, which doesn't
- * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
- * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
- *
- * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
- * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
- * supported by the host kernel. In that case, our trick lets us use
- * the new 0x4200 with the name PTRACE_OLDSETOPTIONS.
- */
-#ifndef PTRACE_OLDSETOPTIONS
-#define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS
-#endif
-
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
-#define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \
-	(((int[3][3] ) { \
-		{ PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \
-		{ PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \
-		{ PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \
-		  PTRACE_SYSEMU_SINGLESTEP } }) \
-		[sysemu_mode][singlestep_mode])
-
 #endif
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5011064b5dd..a7eaad38e40e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -268,84 +268,11 @@ int cpu(void)
 	return current_thread_info()->cpu;
 }
 
-static atomic_t using_sysemu = ATOMIC_INIT(0);
-int sysemu_supported;
-
-void set_using_sysemu(int value)
-{
-	if (value > sysemu_supported)
-		return;
-	atomic_set(&using_sysemu, value);
-}
-
-int get_using_sysemu(void)
-{
-	return atomic_read(&using_sysemu);
-}
-
-static int sysemu_proc_show(struct seq_file *m, void *v)
-{
-	seq_printf(m, "%d\n", get_using_sysemu());
-	return 0;
-}
-
-static int sysemu_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sysemu_proc_show, NULL);
-}
-
-static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
-				 size_t count, loff_t *pos)
-{
-	char tmp[2];
-
-	if (copy_from_user(tmp, buf, 1))
-		return -EFAULT;
-
-	if (tmp[0] >= '0' && tmp[0] <= '2')
-		set_using_sysemu(tmp[0] - '0');
-	/* We use the first char, but pretend to write everything */
-	return count;
-}
-
-static const struct proc_ops sysemu_proc_ops = {
-	.proc_open	= sysemu_proc_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= single_release,
-	.proc_write	= sysemu_proc_write,
-};
-
-int __init make_proc_sysemu(void)
-{
-	struct proc_dir_entry *ent;
-	if (!sysemu_supported)
-		return 0;
-
-	ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
-
-	if (ent == NULL)
-	{
-		printk(KERN_WARNING "Failed to register /proc/sysemu\n");
-		return 0;
-	}
-
-	return 0;
-}
-
-late_initcall(make_proc_sysemu);
-
 int singlestepping(void * t)
 {
 	struct task_struct *task = t ? t : current;
 
-	if (!(task->ptrace & PT_DTRACE))
-		return 0;
-
-	if (task->thread.singlestep_syscall)
-		return 1;
-
-	return 2;
+	return !!(task->ptrace & PT_DTRACE);
 }
 
 /*
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index b425f47bddbb..dd56b6a80616 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -13,7 +13,6 @@
 void user_enable_single_step(struct task_struct *child)
 {
 	child->ptrace |= PT_DTRACE;
-	child->thread.singlestep_syscall = 0;
 
 #ifdef SUBARCH_SET_SINGLESTEPPING
 	SUBARCH_SET_SINGLESTEPPING(child, 1);
@@ -23,7 +22,6 @@ void user_enable_single_step(struct task_struct *child)
 void user_disable_single_step(struct task_struct *child)
 {
 	child->ptrace &= ~PT_DTRACE;
-	child->thread.singlestep_syscall = 0;
 
 #ifdef SUBARCH_SET_SINGLESTEPPING
 	SUBARCH_SET_SINGLESTEPPING(child, 0);
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 88cd9b5c1b74..87c9c625f29b 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -120,18 +120,6 @@ void do_signal(struct pt_regs *regs)
 		}
 	}
 
-	/*
-	 * This closes a way to execute a system call on the host.  If
-	 * you set a breakpoint on a system call instruction and singlestep
-	 * from it, the tracing thread used to PTRACE_SINGLESTEP the process
-	 * rather than PTRACE_SYSCALL it, allowing the system call to execute
-	 * on the host.  The tracing thread will check this flag and
-	 * PTRACE_SYSCALL if necessary.
-	 */
-	if (current->ptrace & PT_DTRACE)
-		current->thread.singlestep_syscall =
-			is_syscall(PT_REGS_IP(¤t->thread.regs));
-
 	/*
 	 * if there's no signal to deliver, we just put the saved sigmask
 	 * back
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 8c524e27571c..d30c96b599ac 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -176,48 +176,11 @@ static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp
 	segv(regs->faultinfo, 0, 1, NULL);
 }
 
-/*
- * To use the same value of using_sysemu as the caller, ask it that value
- * (in local_using_sysemu
- */
-static void handle_trap(int pid, struct uml_pt_regs *regs,
-			int local_using_sysemu)
+static void handle_trap(int pid, struct uml_pt_regs *regs)
 {
-	int err, status;
-
 	if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
 		fatal_sigsegv();
 
-	if (!local_using_sysemu)
-	{
-		err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
-			     __NR_getpid);
-		if (err < 0) {
-			printk(UM_KERN_ERR "%s - nullifying syscall failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
-
-		err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
-		if (err < 0) {
-			printk(UM_KERN_ERR "%s - continuing to end of syscall failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
-
-		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-		if ((err < 0) || !WIFSTOPPED(status) ||
-		    (WSTOPSIG(status) != SIGTRAP + 0x80)) {
-			err = ptrace_dump_regs(pid);
-			if (err)
-				printk(UM_KERN_ERR "Failed to get registers from process, errno = %d\n",
-				       -err);
-			printk(UM_KERN_ERR "%s - failed to wait at end of syscall, errno = %d, status = %d\n",
-			       __func__, errno, status);
-			fatal_sigsegv();
-		}
-	}
-
 	handle_syscall(regs);
 }
 
@@ -354,7 +317,7 @@ int start_userspace(unsigned long stub_stack)
 		goto out_kill;
 	}
 
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
+	if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
@@ -379,8 +342,6 @@ int start_userspace(unsigned long stub_stack)
 void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 {
 	int err, status, op, pid = userspace_pid[0];
-	/* To prevent races if using_sysemu changes under us.*/
-	int local_using_sysemu;
 	siginfo_t si;
 
 	/* Handle any immediate reschedules or signals */
@@ -410,11 +371,10 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 			fatal_sigsegv();
 		}
 
-		/* Now we set local_using_sysemu to be used for one loop */
-		local_using_sysemu = get_using_sysemu();
-
-		op = SELECT_PTRACE_OPERATION(local_using_sysemu,
-					     singlestepping(NULL));
+		if (singlestepping(NULL))
+			op = PTRACE_SYSEMU_SINGLESTEP;
+		else
+			op = PTRACE_SYSEMU;
 
 		if (ptrace(op, pid, 0, 0)) {
 			printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
@@ -473,7 +433,7 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 				else handle_segv(pid, regs, aux_fp_regs);
 				break;
 			case SIGTRAP + 0x80:
-			        handle_trap(pid, regs, local_using_sysemu);
+				handle_trap(pid, regs);
 				break;
 			case SIGTRAP:
 				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
@@ -596,7 +556,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 		goto out_kill;
 	}
 
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
+	if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
 		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index f79dc338279e..4f8fbe51fad6 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -143,71 +143,16 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit)
 	return ret;
 }
 
-/* Changed only during early boot */
-static int force_sysemu_disabled = 0;
-
-static int __init nosysemu_cmd_param(char *str, int* add)
-{
-	force_sysemu_disabled = 1;
-	return 0;
-}
-
-__uml_setup("nosysemu", nosysemu_cmd_param,
-"nosysemu\n"
-"    Turns off syscall emulation patch for ptrace (SYSEMU).\n"
-"    SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
-"    behaviour of ptrace() and helps reduce host context switch rates.\n"
-"    To make it work, you need a kernel patch for your host, too.\n"
-"    See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
-"    information.\n\n");
-
 static void __init check_sysemu(void)
 {
-	unsigned long regs[MAX_REG_NR];
 	int pid, n, status, count=0;
 
-	os_info("Checking syscall emulation patch for ptrace...");
-	sysemu_supported = 0;
-	pid = start_ptraced_child();
-
-	if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
-		goto fail;
-
-	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-	if (n < 0)
-		fatal_perror("check_sysemu : wait failed");
-	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
-		fatal("check_sysemu : expected SIGTRAP, got status = %d\n",
-		      status);
-
-	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
-		fatal_perror("check_sysemu : PTRACE_GETREGS failed");
-	if (PT_SYSCALL_NR(regs) != __NR_getpid) {
-		non_fatal("check_sysemu got system call number %d, "
-			  "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid);
-		goto fail;
-	}
-
-	n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
-	if (n < 0) {
-		non_fatal("check_sysemu : failed to modify system call "
-			  "return");
-		goto fail;
-	}
-
-	if (stop_ptraced_child(pid, 0, 0) < 0)
-		goto fail_stopped;
-
-	sysemu_supported = 1;
-	os_info("OK\n");
-	set_using_sysemu(!force_sysemu_disabled);
-
-	os_info("Checking advanced syscall emulation patch for ptrace...");
+	os_info("Checking syscall emulation for ptrace...");
 	pid = start_ptraced_child();
 
-	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+	if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
-		fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed");
+		fatal_perror("check_sysemu: PTRACE_SETOPTIONS failed");
 
 	while (1) {
 		count++;
@@ -243,17 +188,14 @@ static void __init check_sysemu(void)
 	if (stop_ptraced_child(pid, 0, 0) < 0)
 		goto fail_stopped;
 
-	sysemu_supported = 2;
 	os_info("OK\n");
 
-	if (!force_sysemu_disabled)
-		set_using_sysemu(sysemu_supported);
 	return;
 
 fail:
 	stop_ptraced_child(pid, 1, 0);
 fail_stopped:
-	non_fatal("missing\n");
+	fatal("missing\n");
 }
 
 static void __init check_ptrace(void)
@@ -263,9 +205,9 @@ static void __init check_ptrace(void)
 	os_info("Checking that ptrace can change system call numbers...");
 	pid = start_ptraced_child();
 
-	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+	if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
-		fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
+		fatal_perror("check_ptrace: PTRACE_SETOPTIONS failed");
 
 	while (1) {
 		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 2497bac56066..0c3439aeb2d2 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -24,30 +24,6 @@ void arch_switch_to(struct task_struct *to)
 		printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
 }
 
-int is_syscall(unsigned long addr)
-{
-	unsigned short instr;
-	int n;
-
-	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-	if (n) {
-		/* access_process_vm() grants access to vsyscall and stub,
-		 * while copy_from_user doesn't. Maybe access_process_vm is
-		 * slow, but that doesn't matter, since it will be called only
-		 * in case of singlestepping, if copy_from_user failed.
-		 */
-		n = access_process_vm(current, addr, &instr, sizeof(instr),
-				FOLL_FORCE);
-		if (n != sizeof(instr)) {
-			printk(KERN_ERR "is_syscall : failed to read "
-			       "instruction from 0x%lx\n", addr);
-			return 1;
-		}
-	}
-	/* int 0x80 or sysenter */
-	return (instr == 0x80cd) || (instr == 0x340f);
-}
-
 /* determines which flags the user has access to. */
 /* 1 = access 0 = no access */
 #define FLAG_MASK 0x00044dd5
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index 1401899dee9b..727f3ef3a76b 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -187,32 +187,6 @@ int peek_user(struct task_struct *child, long addr, long data)
 	return put_user(tmp, (unsigned long *) data);
 }
 
-/* XXX Mostly copied from sys-i386 */
-int is_syscall(unsigned long addr)
-{
-	unsigned short instr;
-	int n;
-
-	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-	if (n) {
-		/*
-		 * access_process_vm() grants access to vsyscall and stub,
-		 * while copy_from_user doesn't. Maybe access_process_vm is
-		 * slow, but that doesn't matter, since it will be called only
-		 * in case of singlestepping, if copy_from_user failed.
-		 */
-		n = access_process_vm(current, addr, &instr, sizeof(instr),
-				FOLL_FORCE);
-		if (n != sizeof(instr)) {
-			printk("is_syscall : failed to read instruction from "
-			       "0x%lx\n", addr);
-			return 1;
-		}
-	}
-	/* sysenter */
-	return instr == 0x050f;
-}
-
 static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
 	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index db8478a83a09..0c4989842fbe 100644
--- a/arch/x86/um/shared/sysdep/ptrace_32.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -8,10 +8,6 @@
 
 #define MAX_FP_NR HOST_FPX_SIZE
 
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
 #define UPT_SYSCALL_ARG1(r) UPT_BX(r)
 #define UPT_SYSCALL_ARG2(r) UPT_CX(r)
 #define UPT_SYSCALL_ARG3(r) UPT_DX(r)
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * Re: [PATCH 03/27] um: Drop support for hosts without SYSEMU_SINGLESTEP support
  2021-03-03 15:54 ` [PATCH 03/27] um: Drop support for hosts without SYSEMU_SINGLESTEP support Benjamin Berg
@ 2021-06-19 20:17   ` Richard Weinberger
  2021-06-20 12:05     ` Benjamin Berg
  0 siblings, 1 reply; 30+ messages in thread
From: Richard Weinberger @ 2021-06-19 20:17 UTC (permalink / raw)
  To: Benjamin Berg; +Cc: linux-um
On Thu, Mar 4, 2021 at 12:03 AM Benjamin Berg <benjamin@sipsolutions.net> wrote:
>
> These features have existed since Linux 2.6.14 and can be considered
> widely available at this point.
[...]
> -#ifndef PTRACE_SYSEMU
> -#define PTRACE_SYSEMU 31
> -#endif
> -#ifndef PTRACE_SYSEMU_SINGLESTEP
> -#define PTRACE_SYSEMU_SINGLESTEP 32
> -#endif
Where did you test this? On my OpenSUSE 15.2 machine this change
causes a build failure.
arch/um/os-Linux/start_up.c: In function ‘check_sysemu’:
arch/um/os-Linux/start_up.c:153:14: error: ‘PTRACE_SYSEMU_SINGLESTEP’
undeclared (first use in this function); did you mean
‘PTRACE_SINGLESTEP’?
   if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
              ^~~~~~~~~~~~~~~~~~~~~~~~
              PTRACE_SINGLESTEP
I think we need to carefully include the right asm headers.
-- 
Thanks,
//richard
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply	[flat|nested] 30+ messages in thread
- * Re: [PATCH 03/27] um: Drop support for hosts without SYSEMU_SINGLESTEP support
  2021-06-19 20:17   ` Richard Weinberger
@ 2021-06-20 12:05     ` Benjamin Berg
  0 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-06-20 12:05 UTC (permalink / raw)
  To: Richard Weinberger; +Cc: linux-um
[-- Attachment #1.1: Type: text/plain, Size: 1778 bytes --]
On Sat, 2021-06-19 at 22:17 +0200, Richard Weinberger wrote:
> On Thu, Mar 4, 2021 at 12:03 AM Benjamin Berg
> <benjamin@sipsolutions.net> wrote:
> > 
> > These features have existed since Linux 2.6.14 and can be considered
> > widely available at this point.
> 
> [...]
> 
> > -#ifndef PTRACE_SYSEMU
> > -#define PTRACE_SYSEMU 31
> > -#endif
> > -#ifndef PTRACE_SYSEMU_SINGLESTEP
> > -#define PTRACE_SYSEMU_SINGLESTEP 32
> > -#endif
> 
> Where did you test this? On my OpenSUSE 15.2 machine this change
> causes a build failure.
I have been testing this on Fedora, currently F34 (x86_64).
> arch/um/os-Linux/start_up.c: In function ‘check_sysemu’:
> arch/um/os-Linux/start_up.c:153:14: error: ‘PTRACE_SYSEMU_SINGLESTEP’
> undeclared (first use in this function); did you mean
> ‘PTRACE_SINGLESTEP’?
>    if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
>               ^~~~~~~~~~~~~~~~~~~~~~~~
>               PTRACE_SINGLESTEP
> 
> I think we need to carefully include the right asm headers.
Hmm, possibly. For me the definition is coming from the glibc headers
right now:
# 26 "arch/um/os-Linux/start_up.c" 2
# 1 "./arch/um/include/shared/ptrace_user.h" 1
# 9 "./arch/um/include/shared/ptrace_user.h"
# 1 "/usr/include/sys/ptrace.h" 1 3 4
# 26 "/usr/include/sys/ptrace.h" 3 4
sys/ptrace.h is in turn is provided by glibc-headers-x86-2.33-
16.fc34.noarch.
Not sure what the right thing here is. One could include
"linux/ptrace.h", but those are #defines, while the glibc header uses
an enum. I think it'll work if one first includes the glibc header for
the function definition and only then the kernel header. But, that
seems odd to me right now, and I am not why 
Benjamin
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
[-- Attachment #2: Type: text/plain, Size: 152 bytes --]
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply	[flat|nested] 30+ messages in thread 
 
 
- * [PATCH 04/27] um: Drop NULL check from start_userspace
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (2 preceding siblings ...)
  2021-03-03 15:54 ` [PATCH 03/27] um: Drop support for hosts without SYSEMU_SINGLESTEP support Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 05/27] um: Make errors to stop ptraced child fatal during startup Benjamin Berg
                   ` (22 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
start_userspace is only called from exactly one location, and the passed
pointer for the userspace process stack cannot be NULL.
Remove the check, without changing the control flow.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/skas/process.c | 53 +++++++++++++++------------------
 1 file changed, 24 insertions(+), 29 deletions(-)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index d30c96b599ac..1c31f833a7ed 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -188,7 +188,7 @@ extern char __syscall_stub_start[];
 
 /**
  * userspace_tramp() - userspace trampoline
- * @stack:	pointer to the new userspace stack page, can be NULL, if? FIXME:
+ * @stack:	pointer to the new userspace stack page
  *
  * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
  * This function will run on a temporary stack page.
@@ -203,9 +203,13 @@ extern char __syscall_stub_start[];
  */
 static int userspace_tramp(void *stack)
 {
+	struct sigaction sa;
 	void *addr;
 	int fd;
 	unsigned long long offset;
+	unsigned long segv_handler = STUB_CODE +
+				     (unsigned long) stub_segv_handler -
+				     (unsigned long) __syscall_stub_start;
 
 	ptrace(PTRACE_TRACEME, 0, 0, 0);
 
@@ -221,34 +225,25 @@ static int userspace_tramp(void *stack)
 		exit(1);
 	}
 
-	if (stack != NULL) {
-		fd = phys_mapping(to_phys(stack), &offset);
-		addr = mmap((void *) STUB_DATA,
-			    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
-			    MAP_FIXED | MAP_SHARED, fd, offset);
-		if (addr == MAP_FAILED) {
-			printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
-			       STUB_DATA, errno);
-			exit(1);
-		}
+	fd = phys_mapping(to_phys(stack), &offset);
+	addr = mmap((void *) STUB_DATA,
+		    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
+		    MAP_FIXED | MAP_SHARED, fd, offset);
+	if (addr == MAP_FAILED) {
+		printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
+		       STUB_DATA, errno);
+		exit(1);
 	}
-	if (stack != NULL) {
-		struct sigaction sa;
-
-		unsigned long v = STUB_CODE +
-				  (unsigned long) stub_segv_handler -
-				  (unsigned long) __syscall_stub_start;
-
-		set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
-		sigemptyset(&sa.sa_mask);
-		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
-		sa.sa_sigaction = (void *) v;
-		sa.sa_restorer = NULL;
-		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-			printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
-			       __func__, errno);
-			exit(1);
-		}
+
+	set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+	sa.sa_sigaction = (void *) segv_handler;
+	sa.sa_restorer = NULL;
+	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+		printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
+		       __func__, errno);
+		exit(1);
 	}
 
 	kill(os_getpid(), SIGSTOP);
@@ -260,7 +255,7 @@ int kill_userspace_mm[NR_CPUS];
 
 /**
  * start_userspace() - prepare a new userspace process
- * @stub_stack:	pointer to the stub stack. Can be NULL, if? FIXME:
+ * @stub_stack:	pointer to the stub stack.
  *
  * Setups a new temporary stack page that is used while userspace_tramp() runs
  * Clones the kernel process into a new userspace process, with FDs only.
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 05/27] um: Make errors to stop ptraced child fatal during startup
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (3 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 04/27] um: Drop NULL check from start_userspace Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 06/27] um: Don't use vfprintf() for os_info() Benjamin Berg
                   ` (21 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
For the detection code to check whether SYSEMU_SINGLESTEP works
correctly we needed some error cases while stopping to be non-fatal.
However, at this point stop_ptraced_child must always succeed, and we
can therefore simplify it slightly to exit immediately on error.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/start_up.c | 41 +++++++++++--------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 4f8fbe51fad6..8e6f8c4f3f62 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -112,35 +112,20 @@ static int start_ptraced_child(void)
 	return pid;
 }
 
-/* When testing for SYSEMU support, if it is one of the broken versions, we
- * must just avoid using sysemu, not panic, but only if SYSEMU features are
- * broken.
- * So only for SYSEMU features we test mustpanic, while normal host features
- * must work anyway!
- */
-static int stop_ptraced_child(int pid, int exitcode, int mustexit)
+static void stop_ptraced_child(int pid, int exitcode)
 {
-	int status, n, ret = 0;
+	int status, n;
+
+	if (ptrace(PTRACE_CONT, pid, 0, 0) < 0)
+		fatal_perror("stop_ptraced_child : ptrace failed");
 
-	if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) {
-		perror("stop_ptraced_child : ptrace failed");
-		return -1;
-	}
 	CATCH_EINTR(n = waitpid(pid, &status, 0));
 	if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
 		int exit_with = WEXITSTATUS(status);
-		if (exit_with == 2)
-			non_fatal("check_ptrace : child exited with status 2. "
-				  "\nDisabling SYSEMU support.\n");
-		non_fatal("check_ptrace : child exited with exitcode %d, while "
-			  "expecting %d; status 0x%x\n", exit_with,
-			  exitcode, status);
-		if (mustexit)
-			exit(1);
-		ret = -1;
+		fatal("stop_ptraced_child : child exited with exitcode %d, "
+		      "while expecting %d; status 0x%x\n", exit_with,
+		      exitcode, status);
 	}
-
-	return ret;
 }
 
 static void __init check_sysemu(void)
@@ -185,16 +170,14 @@ static void __init check_sysemu(void)
 			goto fail;
 		}
 	}
-	if (stop_ptraced_child(pid, 0, 0) < 0)
-		goto fail_stopped;
+	stop_ptraced_child(pid, 0);
 
 	os_info("OK\n");
 
 	return;
 
 fail:
-	stop_ptraced_child(pid, 1, 0);
-fail_stopped:
+	stop_ptraced_child(pid, 1);
 	fatal("missing\n");
 }
 
@@ -233,7 +216,7 @@ static void __init check_ptrace(void)
 			break;
 		}
 	}
-	stop_ptraced_child(pid, 0, 1);
+	stop_ptraced_child(pid, 0);
 	os_info("OK\n");
 	check_sysemu();
 }
@@ -280,7 +263,7 @@ void __init os_early_checks(void)
 	pid = start_ptraced_child();
 	if (init_registers(pid))
 		fatal("Failed to initialize default registers");
-	stop_ptraced_child(pid, 1, 1);
+	stop_ptraced_child(pid, 1);
 }
 
 int __init parse_iomem(char *str, int *add)
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 06/27] um: Don't use vfprintf() for os_info()
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (4 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 05/27] um: Make errors to stop ptraced child fatal during startup Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 07/27] um: Do not use printk in SIGWINCH helper thread Benjamin Berg
                   ` (20 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
The threads allocated inside the kernel have only a single page of
stack. Unfortunately, the vfprintf function in standard glibc may use
too much stack-space, overflowing it.
To make os_info safe to be used by helper threads, use the kernel
vscnprintf function into a smallish buffer and write out the information
to stderr.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/util.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 07327425d06e..56d9589e1cd1 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -166,23 +166,38 @@ __uml_setup("quiet", quiet_cmd_param,
 "quiet\n"
 "    Turns off information messages during boot.\n\n");
 
+/*
+ * The os_info/os_warn functions will be called by helper threads. These
+ * have a very limited stack size and using the libc formatting functions
+ * may overflow the stack.
+ * So pull in the kernel vscnprintf and use that instead with a fixed
+ * on-stack buffer.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
 void os_info(const char *fmt, ...)
 {
+	char buf[256];
 	va_list list;
+	int len;
 
 	if (quiet_info)
 		return;
 
 	va_start(list, fmt);
-	vfprintf(stderr, fmt, list);
+	len = vscnprintf(buf, sizeof(buf), fmt, list);
+	fwrite(buf, len, 1, stderr);
 	va_end(list);
 }
 
 void os_warn(const char *fmt, ...)
 {
+	char buf[256];
 	va_list list;
+	int len;
 
 	va_start(list, fmt);
-	vfprintf(stderr, fmt, list);
+	len = vscnprintf(buf, sizeof(buf), fmt, list);
+	fwrite(buf, len, 1, stderr);
 	va_end(list);
 }
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 07/27] um: Do not use printk in SIGWINCH helper thread
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (5 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 06/27] um: Don't use vfprintf() for os_info() Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 08/27] um: Reap winch thread if it fails Benjamin Berg
                   ` (19 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
The thread is running outside of the UML kernel scope and is a helper.
As such, printk cannot work and os_info must be used instead.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/drivers/chan_user.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index d8845d4aac6a..9d87c9998741 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -153,8 +153,8 @@ static int winch_thread(void *arg)
 	pipe_fd = data->pipe_fd;
 	count = write(pipe_fd, &c, sizeof(c));
 	if (count != sizeof(c))
-		printk(UM_KERN_ERR "winch_thread : failed to write "
-		       "synchronization byte, err = %d\n", -count);
+		os_info("winch_thread : failed to write synchronization byte, err = %d\n",
+			-count);
 
 	/*
 	 * We are not using SIG_IGN on purpose, so don't fix it as I thought to
@@ -166,28 +166,28 @@ static int winch_thread(void *arg)
 	sigfillset(&sigs);
 	/* Block all signals possible. */
 	if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) {
-		printk(UM_KERN_ERR "winch_thread : sigprocmask failed, "
-		       "errno = %d\n", errno);
+		os_info("winch_thread : sigprocmask failed, errno = %d\n",
+			errno);
 		exit(1);
 	}
 	/* In sigsuspend(), block anything else than SIGWINCH. */
 	sigdelset(&sigs, SIGWINCH);
 
 	if (setsid() < 0) {
-		printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n",
+		os_info("winch_thread : setsid failed, errno = %d\n",
 		       errno);
 		exit(1);
 	}
 
 	if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) {
-		printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on "
-		       "fd %d err = %d\n", pty_fd, errno);
+		os_info("winch_thread : TIOCSCTTY failed on "
+			"fd %d err = %d\n", pty_fd, errno);
 		exit(1);
 	}
 
 	if (tcsetpgrp(pty_fd, os_getpid()) < 0) {
-		printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on "
-		       "fd %d err = %d\n", pty_fd, errno);
+		os_info("winch_thread : tcsetpgrp failed on fd %d err = %d\n",
+			pty_fd, errno);
 		exit(1);
 	}
 
@@ -199,8 +199,8 @@ static int winch_thread(void *arg)
 	 */
 	count = read(pipe_fd, &c, sizeof(c));
 	if (count != sizeof(c))
-		printk(UM_KERN_ERR "winch_thread : failed to read "
-		       "synchronization byte, err = %d\n", errno);
+		os_info("winch_thread : failed to read synchronization byte, err = %d\n",
+			errno);
 
 	while(1) {
 		/*
@@ -211,8 +211,8 @@ static int winch_thread(void *arg)
 
 		count = write(pipe_fd, &c, sizeof(c));
 		if (count != sizeof(c))
-			printk(UM_KERN_ERR "winch_thread : write failed, "
-			       "err = %d\n", errno);
+			os_info("winch_thread : write failed, err = %d\n",
+				errno);
 	}
 }
 
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 08/27] um: Reap winch thread if it fails
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (6 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 07/27] um: Do not use printk in SIGWINCH helper thread Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 09/27] um: Do not use printk in userspace trampoline Benjamin Berg
                   ` (18 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
When the winch thread runs into an error condition, it would exit(1) and
never be reaped until shutdown time. Change this to write a command byte
which causes the driver to kill it, therefore reaping the child.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/drivers/chan_user.c | 15 ++++++++++-----
 arch/um/drivers/line.c      | 13 ++++++++-----
 2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 9d87c9998741..57547bdcb5c6 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -141,7 +141,7 @@ struct winch_data {
 	int pipe_fd;
 };
 
-static int winch_thread(void *arg)
+static __noreturn int winch_thread(void *arg)
 {
 	struct winch_data *data = arg;
 	sigset_t sigs;
@@ -168,7 +168,7 @@ static int winch_thread(void *arg)
 	if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) {
 		os_info("winch_thread : sigprocmask failed, errno = %d\n",
 			errno);
-		exit(1);
+		goto wait_kill;
 	}
 	/* In sigsuspend(), block anything else than SIGWINCH. */
 	sigdelset(&sigs, SIGWINCH);
@@ -176,19 +176,19 @@ static int winch_thread(void *arg)
 	if (setsid() < 0) {
 		os_info("winch_thread : setsid failed, errno = %d\n",
 		       errno);
-		exit(1);
+		goto wait_kill;
 	}
 
 	if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) {
 		os_info("winch_thread : TIOCSCTTY failed on "
 			"fd %d err = %d\n", pty_fd, errno);
-		exit(1);
+		goto wait_kill;
 	}
 
 	if (tcsetpgrp(pty_fd, os_getpid()) < 0) {
 		os_info("winch_thread : tcsetpgrp failed on fd %d err = %d\n",
 			pty_fd, errno);
-		exit(1);
+		goto wait_kill;
 	}
 
 	/*
@@ -214,6 +214,11 @@ static int winch_thread(void *arg)
 			os_info("winch_thread : write failed, err = %d\n",
 				errno);
 	}
+
+wait_kill:
+	c = 2;
+	count = write(pipe_fd, &c, sizeof(c));
+	while (1) { pause(); };
 }
 
 static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 1c70a31e7c5b..b2800a085a3b 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -629,15 +629,18 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 
 	if (fd != -1) {
 		err = generic_read(fd, &c, NULL);
-		if (err < 0) {
+		/* A read of 2 means the winch thread failed and has warned */
+		if (err < 0 || (err = 1 && c == 2)) {
 			if (err != -EAGAIN) {
 				winch->fd = -1;
 				list_del(&winch->list);
 				os_close_file(fd);
-				printk(KERN_ERR "winch_interrupt : "
-				       "read failed, errno = %d\n", -err);
-				printk(KERN_ERR "fd %d is losing SIGWINCH "
-				       "support\n", winch->tty_fd);
+				if (err < 0) {
+					printk(KERN_ERR "winch_interrupt : read failed, errno = %d\n",
+					       -err);
+					printk(KERN_ERR "fd %d is losing SIGWINCH support\n",
+					       winch->tty_fd);
+				}
 				INIT_WORK(&winch->work, __free_winch);
 				schedule_work(&winch->work);
 				return IRQ_HANDLED;
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 09/27] um: Do not use printk in userspace trampoline
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (7 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 08/27] um: Reap winch thread if it fails Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 10/27] um: Always inline stub functions Benjamin Berg
                   ` (17 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
The trampoline is running in a cloned process. It is not safe to use
printk for error printing there.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/skas/process.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 1c31f833a7ed..15818009731d 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -220,8 +220,8 @@ static int userspace_tramp(void *stack)
 	addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
 		      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
 	if (addr == MAP_FAILED) {
-		printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, errno = %d\n",
-		       STUB_CODE, errno);
+		os_info("mapping mmap stub at 0x%lx failed, errno = %d\n",
+			STUB_CODE, errno);
 		exit(1);
 	}
 
@@ -230,8 +230,8 @@ static int userspace_tramp(void *stack)
 		    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
 		    MAP_FIXED | MAP_SHARED, fd, offset);
 	if (addr == MAP_FAILED) {
-		printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
-		       STUB_DATA, errno);
+		os_info("mapping segfault stack at 0x%lx failed, errno = %d\n",
+			STUB_DATA, errno);
 		exit(1);
 	}
 
@@ -241,8 +241,8 @@ static int userspace_tramp(void *stack)
 	sa.sa_sigaction = (void *) segv_handler;
 	sa.sa_restorer = NULL;
 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-		printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
-		       __func__, errno);
+		os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
+			__func__, errno);
 		exit(1);
 	}
 
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 10/27] um: Always inline stub functions
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (8 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 09/27] um: Do not use printk in userspace trampoline Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 11/27] um: Rely on PTRACE_SETREGSET to set FS/GS base registers Benjamin Berg
                   ` (16 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
The stub executable page is remapped to a different location in the
userland process. As these functions may be used by the stub, they
really need to be always inlined rather than permitting the compiler to
emit a function.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/x86/um/shared/sysdep/stub_32.h | 21 +++++++++++----------
 arch/x86/um/shared/sysdep/stub_64.h | 15 ++++++++-------
 2 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index c3891c1ada26..02511d046404 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -12,7 +12,7 @@
 #define STUB_MMAP_NR __NR_mmap2
 #define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
 
-static inline long stub_syscall0(long syscall)
+static __always_inline long stub_syscall0(long syscall)
 {
 	long ret;
 
@@ -21,7 +21,7 @@ static inline long stub_syscall0(long syscall)
 	return ret;
 }
 
-static inline long stub_syscall1(long syscall, long arg1)
+static __always_inline long stub_syscall1(long syscall, long arg1)
 {
 	long ret;
 
@@ -30,7 +30,7 @@ static inline long stub_syscall1(long syscall, long arg1)
 	return ret;
 }
 
-static inline long stub_syscall2(long syscall, long arg1, long arg2)
+static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
 {
 	long ret;
 
@@ -40,7 +40,8 @@ static inline long stub_syscall2(long syscall, long arg1, long arg2)
 	return ret;
 }
 
-static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
+					  long arg3)
 {
 	long ret;
 
@@ -50,8 +51,8 @@ static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
 	return ret;
 }
 
-static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
-				 long arg4)
+static __always_inline long stub_syscall4(long syscall, long arg1, long arg2,
+					  long arg3, long arg4)
 {
 	long ret;
 
@@ -61,8 +62,8 @@ static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
 	return ret;
 }
 
-static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
-				 long arg4, long arg5)
+static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
+					  long arg3, long arg4, long arg5)
 {
 	long ret;
 
@@ -72,12 +73,12 @@ static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
 	return ret;
 }
 
-static inline void trap_myself(void)
+static __always_inline void trap_myself(void)
 {
 	__asm("int3");
 }
 
-static void inline remap_stack_and_trap(void)
+static __always_inline void remap_stack_and_trap(void)
 {
 	__asm__ volatile (
 		"movl %%esp,%%ebx ;"
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 6e2626b77a2e..871c770fc44b 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -15,7 +15,7 @@
 #define __syscall_clobber "r11","rcx","memory"
 #define __syscall "syscall"
 
-static inline long stub_syscall0(long syscall)
+static __always_inline long stub_syscall0(long syscall)
 {
 	long ret;
 
@@ -26,7 +26,7 @@ static inline long stub_syscall0(long syscall)
 	return ret;
 }
 
-static inline long stub_syscall2(long syscall, long arg1, long arg2)
+static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
 {
 	long ret;
 
@@ -37,7 +37,8 @@ static inline long stub_syscall2(long syscall, long arg1, long arg2)
 	return ret;
 }
 
-static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
+					  long arg3)
 {
 	long ret;
 
@@ -63,8 +64,8 @@ static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
 	return ret;
 }
 
-static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
-				 long arg4, long arg5)
+static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
+					  long arg3, long arg4, long arg5)
 {
 	long ret;
 
@@ -77,12 +78,12 @@ static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
 	return ret;
 }
 
-static inline void trap_myself(void)
+static __always_inline void trap_myself(void)
 {
 	__asm("int3");
 }
 
-static inline void remap_stack_and_trap(void)
+static __always_inline void remap_stack_and_trap(void)
 {
 	__asm__ volatile (
 		"movq %0,%%rax ;"
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 11/27] um: Rely on PTRACE_SETREGSET to set FS/GS base registers
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (9 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 10/27] um: Always inline stub functions Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 12/27] um: Remove unused register save/restore functions Benjamin Berg
                   ` (15 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
These registers are saved/restored together with the other general
registers using ptrace. In arch_set_tls we then just need to set the
register and it will be synced back normally.
Most of this logic was introduced in commit f355559cf7845 ("[PATCH] uml:
x86_64 thread fixes"). However, at least today we can rely on ptrace to
restore the base registers for us. As such, only the part of the patch
that tracks the FS register for use as thread local storage is actually
needed.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/os.h    |  3 --
 arch/x86/um/asm/elf.h          |  4 +--
 arch/x86/um/asm/processor_64.h |  3 --
 arch/x86/um/os-Linux/Makefile  |  1 -
 arch/x86/um/os-Linux/prctl.c   | 12 -------
 arch/x86/um/syscalls_64.c      | 59 ++++++----------------------------
 arch/x86/um/tls_64.c           |  2 +-
 7 files changed, 13 insertions(+), 71 deletions(-)
 delete mode 100644 arch/x86/um/os-Linux/prctl.c
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 13d86f94cf0f..9a543aa614bb 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -325,9 +325,6 @@ extern void sigio_broken(int fd);
 extern int __add_sigio_fd(int fd);
 extern int __ignore_sigio_fd(int fd);
 
-/* prctl.c */
-extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
-
 /* tty.c */
 extern int get_pty(void);
 
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index c907b20d4993..30eeb2e3404a 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -168,8 +168,8 @@ do {								\
 	(pr_reg)[18] = (_regs)->regs.gp[18];			\
 	(pr_reg)[19] = (_regs)->regs.gp[19];			\
 	(pr_reg)[20] = (_regs)->regs.gp[20];			\
-	(pr_reg)[21] = current->thread.arch.fs;			\
-	(pr_reg)[22] = 0;					\
+	(pr_reg)[21] = (_regs)->regs.gp[21];			\
+	(pr_reg)[22] = (_regs)->regs.gp[22];			\
 	(pr_reg)[23] = 0;					\
 	(pr_reg)[24] = 0;					\
 	(pr_reg)[25] = 0;					\
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
index 1ef9c21877bc..f90159508936 100644
--- a/arch/x86/um/asm/processor_64.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -10,13 +10,11 @@
 struct arch_thread {
         unsigned long debugregs[8];
         int debugregs_seq;
-        unsigned long fs;
         struct faultinfo faultinfo;
 };
 
 #define INIT_ARCH_THREAD { .debugregs  		= { [ 0 ... 7 ] = 0 }, \
 			   .debugregs_seq	= 0, \
-			   .fs			= 0, \
 			   .faultinfo		= { 0, 0, 0 } }
 
 #define STACKSLOTS_PER_LINE 4
@@ -28,7 +26,6 @@ static inline void arch_flush_thread(struct arch_thread *thread)
 static inline void arch_copy_thread(struct arch_thread *from,
                                     struct arch_thread *to)
 {
-	to->fs = from->fs;
 }
 
 #define current_sp() ({ void *sp; __asm__("movq %%rsp, %0" : "=r" (sp) : ); sp; })
diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile
index 253bfb8cb702..2859bbf0f3db 100644
--- a/arch/x86/um/os-Linux/Makefile
+++ b/arch/x86/um/os-Linux/Makefile
@@ -6,7 +6,6 @@
 obj-y = registers.o task_size.o mcontext.o
 
 obj-$(CONFIG_X86_32) += tls.o
-obj-$(CONFIG_64BIT) += prctl.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
deleted file mode 100644
index 8431e87ac333..000000000000
--- a/arch/x86/um/os-Linux/prctl.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com})
- * Licensed under the GPL
- */
-
-#include <sys/ptrace.h>
-#include <asm/ptrace.h>
-
-int os_arch_prctl(int pid, int option, unsigned long *arg2)
-{
-	return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
-}
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index 58f51667e2e4..5e89a509d225 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -15,60 +15,22 @@
 long arch_prctl(struct task_struct *task, int option,
 		unsigned long __user *arg2)
 {
-	unsigned long *ptr = arg2, tmp;
-	long ret;
-	int pid = task->mm->context.id.u.pid;
-
-	/*
-	 * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
-	 * be safe), we need to call arch_prctl on the host because
-	 * setting %fs may result in something else happening (like a
-	 * GDT or thread.fs being set instead).  So, we let the host
-	 * fiddle the registers and thread struct and restore the
-	 * registers afterwards.
-	 *
-	 * So, the saved registers are stored to the process (this
-	 * needed because a stub may have been the last thing to run),
-	 * arch_prctl is run on the host, then the registers are read
-	 * back.
-	 */
-	switch (option) {
-	case ARCH_SET_FS:
-	case ARCH_SET_GS:
-		ret = restore_registers(pid, ¤t->thread.regs.regs);
-		if (ret)
-			return ret;
-		break;
-	case ARCH_GET_FS:
-	case ARCH_GET_GS:
-		/*
-		 * With these two, we read to a local pointer and
-		 * put_user it to the userspace pointer that we were
-		 * given.  If addr isn't valid (because it hasn't been
-		 * faulted in or is just bogus), we want put_user to
-		 * fault it in (or return -EFAULT) instead of having
-		 * the host return -EFAULT.
-		 */
-		ptr = &tmp;
-	}
-
-	ret = os_arch_prctl(pid, option, ptr);
-	if (ret)
-		return ret;
+	long ret = -EINVAL;
 
 	switch (option) {
 	case ARCH_SET_FS:
-		current->thread.arch.fs = (unsigned long) ptr;
-		ret = save_registers(pid, ¤t->thread.regs.regs);
+		current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] = (unsigned long) arg2;
+		ret = 0;
 		break;
 	case ARCH_SET_GS:
-		ret = save_registers(pid, ¤t->thread.regs.regs);
+		current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] = (unsigned long) arg2;
+		ret = 0;
 		break;
 	case ARCH_GET_FS:
-		ret = put_user(tmp, arg2);
+		ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2);
 		break;
 	case ARCH_GET_GS:
-		ret = put_user(tmp, arg2);
+		ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2);
 		break;
 	}
 
@@ -82,8 +44,7 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 
 void arch_switch_to(struct task_struct *to)
 {
-	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
-		return;
-
-	arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
+	/* Nothing needs to be done on x86_64.
+	 * The FS_BASE/GS_BASE registers are saved in the ptrace register set.
+	 */
 }
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
index ebd3855d9b13..c51a613f6f5c 100644
--- a/arch/x86/um/tls_64.c
+++ b/arch/x86/um/tls_64.c
@@ -12,7 +12,7 @@ int arch_set_tls(struct task_struct *t, unsigned long tls)
 	 * If CLONE_SETTLS is set, we need to save the thread id
 	 * so it can be set during context switches.
 	 */
-	t->thread.arch.fs = tls;
+	t->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] = tls;
 
 	return 0;
 }
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 12/27] um: Remove unused register save/restore functions
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (10 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 11/27] um: Rely on PTRACE_SETREGSET to set FS/GS base registers Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 13/27] um: Mark 32bit syscall helpers as clobbering memory Benjamin Berg
                   ` (14 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
These functions were only used when calling PTRACE_ARCH_PRCTL, but this
code has been removed.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/registers.h |  2 --
 arch/um/os-Linux/registers.c       | 20 --------------------
 2 files changed, 22 deletions(-)
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index 0c50fa6e8a55..2c84e8bdb35d 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -15,8 +15,6 @@ extern int save_fp_registers(int pid, unsigned long *fp_regs);
 extern int restore_fp_registers(int pid, unsigned long *fp_regs);
 extern int save_fpx_registers(int pid, unsigned long *fp_regs);
 extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
-extern int save_registers(int pid, struct uml_pt_regs *regs);
-extern int restore_registers(int pid, struct uml_pt_regs *regs);
 extern int init_registers(int pid);
 extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
 extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index 2d9270508e15..52823368e15a 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -11,26 +11,6 @@
 #include <sysdep/ptrace_user.h>
 #include <registers.h>
 
-int save_registers(int pid, struct uml_pt_regs *regs)
-{
-	int err;
-
-	err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp);
-	if (err < 0)
-		return -errno;
-	return 0;
-}
-
-int restore_registers(int pid, struct uml_pt_regs *regs)
-{
-	int err;
-
-	err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp);
-	if (err < 0)
-		return -errno;
-	return 0;
-}
-
 /* This is set once at boot time and not changed thereafter */
 
 static unsigned long exec_regs[MAX_REG_NR];
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 13/27] um: Mark 32bit syscall helpers as clobbering memory
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (11 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 12/27] um: Remove unused register save/restore functions Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 14/27] um: Create signal stack memory assignment in stub_data Benjamin Berg
                   ` (13 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
The 64bit helper are marked to clobber the memory, but the 32bit ones
are not. Add the appropriate clobber to the 32bit helper routines so
that the compiler cannot do invalid optimizations.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/x86/um/shared/sysdep/stub_32.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 02511d046404..d7284dd4a850 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -16,7 +16,8 @@ static __always_inline long stub_syscall0(long syscall)
 {
 	long ret;
 
-	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall));
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall)
+			: "memory");
 
 	return ret;
 }
@@ -25,7 +26,8 @@ static __always_inline long stub_syscall1(long syscall, long arg1)
 {
 	long ret;
 
-	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1));
+	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1)
+			: "memory");
 
 	return ret;
 }
@@ -35,7 +37,8 @@ static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
 	long ret;
 
 	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-			"c" (arg2));
+			"c" (arg2)
+			: "memory");
 
 	return ret;
 }
@@ -46,7 +49,8 @@ static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
 	long ret;
 
 	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-			"c" (arg2), "d" (arg3));
+			"c" (arg2), "d" (arg3)
+			: "memory");
 
 	return ret;
 }
@@ -57,7 +61,8 @@ static __always_inline long stub_syscall4(long syscall, long arg1, long arg2,
 	long ret;
 
 	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-			"c" (arg2), "d" (arg3), "S" (arg4));
+			"c" (arg2), "d" (arg3), "S" (arg4)
+			: "memory");
 
 	return ret;
 }
@@ -68,7 +73,8 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
 	long ret;
 
 	__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
-			"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5));
+			"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
+			: "memory");
 
 	return ret;
 }
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 14/27] um: Create signal stack memory assignment in stub_data
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (12 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 13/27] um: Mark 32bit syscall helpers as clobbering memory Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 15/27] um: Add generic stub_syscall6 function Benjamin Berg
                   ` (12 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
When we switch to use seccomp, we need both the signal stack and other
data (i.e. syscall information) to co-exist in the stub data. To
facilitate this, start by defining separate memory areas for the stack
and syscall data.
Only change the signal stack setup for now, as the syscall code will be
reworked later.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/skas/stub-data.h | 12 ++++++++++++
 arch/um/kernel/skas/clone.c             |  5 ++++-
 arch/um/os-Linux/skas/process.c         |  9 ++++++++-
 3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 5e3ade3fb38b..2d5da12679ad 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -8,10 +8,22 @@
 #ifndef __STUB_DATA_H
 #define __STUB_DATA_H
 
+#include <linux/compiler_types.h>
+#include <as-layout.h>
+
 struct stub_data {
 	unsigned long offset;
 	int fd;
 	long parent_err, child_err;
+
+	/* 128 leaves enough room for additional fields in the struct. */
+	unsigned char syscall_data[UM_KERN_PAGE_SIZE - MINSIGSTKSZ - 128]
+		      __aligned(16);
+
+	/* Stack for our signal handlers and for calling into . */
+	unsigned char sigstack[MINSIGSTKSZ + 32] __aligned(16);
 };
 
+typedef char stub_data_sizecheck[sizeof(struct stub_data) <= UM_KERN_PAGE_SIZE ? 1 : -1] __always_unused;
+
 #endif
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 592cdb138441..3e2139a81475 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -28,8 +28,11 @@ stub_clone_handler(void)
 	struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
 	long err;
 
+	/* Use the syscall data as a temporary stack area. */
 	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
-			    (unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
+			    (unsigned long) data->syscall_data +
+					    sizeof(data->syscall_data) -
+					    sizeof(void *));
 	if (err) {
 		data->parent_err = err;
 		goto done;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 15818009731d..717cfe7400a1 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -204,6 +204,7 @@ extern char __syscall_stub_start[];
 static int userspace_tramp(void *stack)
 {
 	struct sigaction sa;
+	struct stub_data *data;
 	void *addr;
 	int fd;
 	unsigned long long offset;
@@ -234,8 +235,14 @@ static int userspace_tramp(void *stack)
 			STUB_DATA, errno);
 		exit(1);
 	}
+	data = (void *) addr;
+	/*
+	 * We need at least MINSIGSTKSZ for the kernel and glibc wants a bit more
+	 * But we cannot use BUILD_BUG_ON here.
+	 * BUILD_BUG_ON (sizeof(data->sigstack) >= MINSIGSTKSZ + sizeof(struct stack_t));
+	 */
 
-	set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
+	set_sigstack((void *) &data->sigstack, sizeof(data->sigstack));
 	sigemptyset(&sa.sa_mask);
 	sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
 	sa.sa_sigaction = (void *) segv_handler;
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 15/27] um: Add generic stub_syscall6 function
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (13 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 14/27] um: Create signal stack memory assignment in stub_data Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 16/27] um: Rework syscall handling Benjamin Berg
                   ` (11 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
This function will be used by the new syscall handling code.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/x86/um/shared/sysdep/stub_32.h | 22 ++++++++++++++++++++++
 arch/x86/um/shared/sysdep/stub_64.h | 16 ++++++++++++++++
 2 files changed, 38 insertions(+)
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index d7284dd4a850..3fb8559fe994 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -79,6 +79,28 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
 	return ret;
 }
 
+static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
+					  long arg3, long arg4, long arg5,
+					  long arg6)
+{
+	struct syscall_args {
+	  int ebx, ebp;
+	} args = { arg1, arg6 };
+	long ret;
+
+	__asm__ volatile ("pushl %%ebp;"
+			"movl 0x4(%%ebx),%%ebp;"
+			"movl (%%ebx),%%ebx;"
+			"int $0x80;"
+			"popl %%ebp"
+			: "=a" (ret)
+			: "0" (syscall), "b" (&args),
+			"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
+			: "memory");
+
+	return ret;
+}
+
 static __always_inline void trap_myself(void)
 {
 	__asm("int3");
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 871c770fc44b..c41ae0462c8f 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -78,6 +78,22 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
 	return ret;
 }
 
+static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
+					  long arg3, long arg4, long arg5,
+					  long arg6)
+{
+	long ret;
+
+	__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; "
+		__syscall
+		: "=a" (ret)
+		: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
+		  "g" (arg4), "g" (arg5), "g" (arg6)
+		: __syscall_clobber, "r10", "r8", "r9");
+
+	return ret;
+}
+
 static __always_inline void trap_myself(void)
 {
 	__asm("int3");
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 16/27] um: Rework syscall handling
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (14 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 15/27] um: Add generic stub_syscall6 function Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 17/27] um: Store full CSGSFS and SS register from mcontext Benjamin Berg
                   ` (10 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
Rework syscall handling to be platform independent. Also create a clean
split between queueing of syscalls and flushing them out, removing the
need to keep state in the code that triggers the syscalls.
The code adds syscall_data_len to the global mm_id structure. This will
be used later to allow surrounding code to track whether syscalls still
need to run and if errors occurred.
The patch decreases the amount of memory available to queue syscalls, as
it prevents overlap between the signal stack and syscall data. This is
intentional, as such an overlap must not happen when using seccomp.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/os.h             |  24 ++-
 arch/um/include/shared/skas/mm_id.h     |   1 +
 arch/um/include/shared/skas/stub-data.h |  14 +-
 arch/um/include/shared/user.h           |   8 +
 arch/um/kernel/exec.c                   |  10 +-
 arch/um/kernel/skas/Makefile            |   4 +-
 arch/um/kernel/skas/clone.c             |   2 +-
 arch/um/kernel/skas/stub.c              |  52 ++++++
 arch/um/kernel/tlb.c                    |  42 ++---
 arch/um/os-Linux/skas/mem.c             | 239 +++++++++++++-----------
 arch/um/os-Linux/skas/process.c         |   4 +-
 arch/x86/um/Makefile                    |   2 +-
 arch/x86/um/ldt.c                       |  45 ++---
 arch/x86/um/shared/sysdep/stub.h        |   1 +
 arch/x86/um/stub_32.S                   |  56 ------
 arch/x86/um/stub_64.S                   |  50 -----
 16 files changed, 262 insertions(+), 292 deletions(-)
 create mode 100644 arch/um/kernel/skas/stub.c
 delete mode 100644 arch/x86/um/stub_32.S
 delete mode 100644 arch/x86/um/stub_64.S
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 9a543aa614bb..632c83d83c8d 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -268,19 +268,17 @@ extern long long os_persistent_clock_emulation(void);
 extern long long os_nsecs(void);
 
 /* skas/mem.c */
-extern long run_syscall_stub(struct mm_id * mm_idp,
-			     int syscall, unsigned long *args, long expected,
-			     void **addr, int done);
-extern long syscall_stub_data(struct mm_id * mm_idp,
-			      unsigned long *data, int data_count,
-			      void **addr, void **stub_addr);
-extern int map(struct mm_id * mm_idp, unsigned long virt,
-	       unsigned long len, int prot, int phys_fd,
-	       unsigned long long offset, int done, void **data);
-extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-		 int done, void **data);
-extern int protect(struct mm_id * mm_idp, unsigned long addr,
-		   unsigned long len, unsigned int prot, int done, void **data);
+int syscall_stub_flush(struct mm_id *mm_idp);
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp,
+					unsigned long data_len,
+					unsigned long *data_addr);
+
+void map(struct mm_id *mm_idp, unsigned long virt,
+	 unsigned long len, int prot, int phys_fd,
+	 unsigned long long offset);
+void unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
+void protect(struct mm_id *mm_idp, unsigned long addr,
+	     unsigned long len, unsigned int prot);
 
 /* skas/process.c */
 extern int is_skas_winch(int pid, int fd, void *data);
diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h
index e82e203f5f41..bcb951719b51 100644
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@@ -13,6 +13,7 @@ struct mm_id {
 	} u;
 	unsigned long stack;
 	int kill;
+	int syscall_data_len;
 };
 
 #endif
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 2d5da12679ad..efa78bc359cb 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -11,11 +11,23 @@
 #include <linux/compiler_types.h>
 #include <as-layout.h>
 
+#define STUB_NEXT_SYSCALL(s) \
+	((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
+
+struct stub_syscall {
+	long syscall;
+	int cmd_len;
+	long expected_result;
+	long arg[6];
+	long data[];
+};
+
 struct stub_data {
 	unsigned long offset;
 	int fd;
-	long parent_err, child_err;
+	long err, child_err;
 
+	int syscall_data_len;
 	/* 128 leaves enough room for additional fields in the struct. */
 	unsigned char syscall_data[UM_KERN_PAGE_SIZE - MINSIGSTKSZ - 128]
 		      __aligned(16);
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index e793e4212f0a..619532ef92b5 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -40,11 +40,19 @@ extern void panic(const char *fmt, ...)
 #ifdef UML_CONFIG_PRINTK
 extern int printk(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
+extern void print_hex_dump(const char *level, const char *prefix_str,
+			   int prefix_type, int rowsize, int groupsize,
+			   const void *buf, size_t len, _Bool ascii);
 #else
 static inline int printk(const char *fmt, ...)
 {
 	return 0;
 }
+static inline void print_hex_dump(const char *level, const char *prefix_str,
+				  int prefix_type, int rowsize, int groupsize,
+				  const void *buf, size_t len, _Bool ascii)
+{
+}
 #endif
 
 extern int in_aton(char *str);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index cd05bf98265d..88e67323f1cf 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -21,15 +21,11 @@
 
 void flush_thread(void)
 {
-	void *data = NULL;
-	int ret;
-
 	arch_flush_thread(¤t->thread.arch);
 
-	ret = unmap(¤t->mm->context.id, 0, TASK_SIZE, 1, &data);
-	if (ret) {
-		printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
-		       __func__, ret);
+	unmap(¤t->mm->context.id, 0, TASK_SIZE);
+	if (syscall_stub_flush(¤t->mm->context.id) < 0) {
+		printk(KERN_ERR "%s - clearing address space failed", __func__);
 		force_sig(SIGKILL);
 	}
 	get_safe_registers(current_pt_regs()->regs.gp,
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index f3d494a4fd9b..a863638cc1f0 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,14 +3,14 @@
 # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 #
 
-obj-y := clone.o mmu.o process.o syscall.o uaccess.o
+obj-y := clone.o stub.o mmu.o process.o syscall.o uaccess.o
 
 # clone.o is in the stub, so it can't be built with profiling
 # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
 # disable it
 
 CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
-UNPROFILE_OBJS := clone.o
+UNPROFILE_OBJS := clone.o stub.o
 
 KCOV_INSTRUMENT := n
 
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 3e2139a81475..a680d80b3870 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -34,7 +34,7 @@ stub_clone_handler(void)
 					    sizeof(data->syscall_data) -
 					    sizeof(void *));
 	if (err) {
-		data->parent_err = err;
+		data->err = err;
 		goto done;
 	}
 
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
new file mode 100644
index 000000000000..5d1bcc883866
--- /dev/null
+++ b/arch/um/kernel/skas/stub.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
+ */
+
+#include <sysdep/stub.h>
+
+static __always_inline int
+syscall_handler(struct stub_data *d)
+{
+	struct stub_syscall *sc;
+	long ret;
+
+	sc = (void *) &d->syscall_data;
+	while ((unsigned long) sc - (unsigned long) d->syscall_data < d->syscall_data_len) {
+		ret = stub_syscall6(sc->syscall,
+				    sc->arg[0], sc->arg[1], sc->arg[2],
+				    sc->arg[3], sc->arg[4], sc->arg[5]);
+
+		/*
+		 * If there was an error, then set d->err and set
+		 * d->syscall_data_len to point to the failed syscall.
+		 */
+		if (ret != sc->expected_result) {
+			d->err = ret;
+			d->syscall_data_len = (unsigned long) sc - (unsigned long) d->syscall_data;
+
+			return -1;
+		}
+
+		sc = STUB_NEXT_SYSCALL(sc);
+	}
+	d->err = 0;
+	d->syscall_data_len = 0;
+
+	return 0;
+}
+
+void __section(".__syscall_stub")
+stub_syscall_handler(void)
+{
+	/*
+	 * NOTE: Putting this inside the inlined function will result in
+	 * incorrect optimizations with GCC 10.2.1.
+	 */
+	int stack;
+	struct stub_data *d = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
+
+	syscall_handler(d);
+
+	trap_myself();
+}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 3c709e6146dc..c15cac380fcd 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -70,21 +70,19 @@ static int do_ops(struct host_vm_change *hvc, int end,
 		switch (op->type) {
 		case MMAP:
 			if (hvc->userspace)
-				ret = map(&hvc->mm->context.id, op->u.mmap.addr,
-					  op->u.mmap.len, op->u.mmap.prot,
-					  op->u.mmap.fd,
-					  op->u.mmap.offset, finished,
-					  &hvc->data);
+				map(&hvc->mm->context.id, op->u.mmap.addr,
+				    op->u.mmap.len, op->u.mmap.prot,
+				    op->u.mmap.fd,
+				    op->u.mmap.offset);
 			else
 				map_memory(op->u.mmap.addr, op->u.mmap.offset,
 					   op->u.mmap.len, 1, 1, 1);
 			break;
 		case MUNMAP:
 			if (hvc->userspace)
-				ret = unmap(&hvc->mm->context.id,
-					    op->u.munmap.addr,
-					    op->u.munmap.len, finished,
-					    &hvc->data);
+				unmap(&hvc->mm->context.id,
+				      op->u.munmap.addr,
+				      op->u.munmap.len);
 			else
 				ret = os_unmap_memory(
 					(void *) op->u.munmap.addr,
@@ -93,11 +91,10 @@ static int do_ops(struct host_vm_change *hvc, int end,
 			break;
 		case MPROTECT:
 			if (hvc->userspace)
-				ret = protect(&hvc->mm->context.id,
-					      op->u.mprotect.addr,
-					      op->u.mprotect.len,
-					      op->u.mprotect.prot,
-					      finished, &hvc->data);
+				protect(&hvc->mm->context.id,
+					op->u.mprotect.addr,
+					op->u.mprotect.len,
+					op->u.mprotect.prot);
 			else
 				ret = os_protect_memory(
 					(void *) op->u.mprotect.addr,
@@ -112,6 +109,9 @@ static int do_ops(struct host_vm_change *hvc, int end,
 		}
 	}
 
+	if (hvc->userspace && finished)
+		ret = syscall_stub_flush(&hvc->mm->context.id);
+
 	if (ret == -ENOMEM)
 		report_enomem();
 
@@ -460,7 +460,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 	pmd_t *pmd;
 	pte_t *pte;
 	struct mm_struct *mm = vma->vm_mm;
-	void *flush = NULL;
 	int r, w, x, prot, err = 0;
 	struct mm_id *mm_id;
 
@@ -503,14 +502,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 			int fd;
 
 			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
-			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
-				  1, &flush);
-		}
-		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
-	}
-	else if (pte_newprot(*pte))
-		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
+			map(mm_id, address, PAGE_SIZE, prot, fd, offset);
+		} else
+			unmap(mm_id, address, PAGE_SIZE);
+	} else if (pte_newprot(*pte))
+		protect(mm_id, address, PAGE_SIZE, prot);
 
+	err = syscall_stub_flush(mm_id);
 	if (err) {
 		if (err == -ENOMEM)
 			report_enomem();
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 953fb10f3f93..b52d536d2d4d 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
@@ -18,11 +19,11 @@
 #include <sysdep/ptrace.h>
 #include <sysdep/stub.h>
 
-extern char batch_syscall_stub[], __syscall_stub_start[];
+extern char __syscall_stub_start[];
 
 extern void wait_stub_done(int pid);
 
-static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
+static inline unsigned long *check_init_stack(struct mm_id *mm_idp,
 					      unsigned long *stack)
 {
 	if (stack == NULL) {
@@ -37,22 +38,22 @@ static unsigned long syscall_regs[MAX_REG_NR];
 static int __init init_syscall_regs(void)
 {
 	get_safe_registers(syscall_regs, NULL);
+
 	syscall_regs[REGS_IP_INDEX] = STUB_CODE +
-		((unsigned long) batch_syscall_stub -
+		((unsigned long) stub_syscall_handler -
 		 (unsigned long) __syscall_stub_start);
-	syscall_regs[REGS_SP_INDEX] = STUB_DATA;
+	syscall_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
+				      sizeof(void *);
 
 	return 0;
 }
 
 __initcall(init_syscall_regs);
 
-static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
+static inline long do_syscall_stub(struct mm_id *mm_idp)
 {
+	struct stub_data *proc_data = (void *)mm_idp->stack;
 	int n, i;
-	long ret, offset;
-	unsigned long * data;
-	unsigned long * syscall;
 	int err, pid = mm_idp->u.pid;
 
 	n = ptrace_setregs(pid, syscall_regs);
@@ -64,6 +65,9 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 		      __func__, -n);
 	}
 
+	/* Inform process how much we have filled in. */
+	proc_data->syscall_data_len = mm_idp->syscall_data_len;
+
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
 	if (err)
 		panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
@@ -72,135 +76,148 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 	wait_stub_done(pid);
 
 	/*
-	 * When the stub stops, we find the following values on the
-	 * beginning of the stack:
-	 * (long )return_value
-	 * (long )offset to failed sycall-data (0, if no error)
+	 * proc_data->err will be non-zero if there was an (unexpected) error.
+	 * In that case, syscall_data_len points to the last executed syscall,
+	 * otherwise it will be zero (but we do not need to rely on that).
 	 */
-	ret = *((unsigned long *) mm_idp->stack);
-	offset = *((unsigned long *) mm_idp->stack + 1);
-	if (offset) {
-		data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
-		printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n",
-		       __func__, ret, offset, data);
-		syscall = (unsigned long *)((unsigned long)data + data[0]);
-		printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
-		       __func__, syscall[0], ret, syscall[7]);
+	if (proc_data->err) {
+		struct stub_syscall *sc;
+
+		if (proc_data->syscall_data_len < 0 ||
+		    proc_data->syscall_data_len > (long) mm_idp->syscall_data_len - sizeof(*sc))
+			panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!",
+			      proc_data->syscall_data_len,
+			      mm_idp->syscall_data_len);
+
+		sc = (void *) (((unsigned long) &proc_data->syscall_data) +
+			       proc_data->syscall_data_len);
+
+		printk(UM_KERN_ERR "%s : length = %d, last offset = %d",
+		       __func__, mm_idp->syscall_data_len,
+		       proc_data->syscall_data_len);
+		printk(UM_KERN_ERR "%s : syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
+		       __func__, sc->syscall, proc_data->err,
+		       sc->expected_result);
+
 		printk(UM_KERN_ERR "    syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
-		       syscall[1], syscall[2], syscall[3],
-		       syscall[4], syscall[5], syscall[6]);
-		for (n = 1; n < data[0]/sizeof(long); n++) {
-			if (n == 1)
-				printk(UM_KERN_ERR "    additional syscall data:");
-			if (n % 4 == 1)
-				printk("\n" UM_KERN_ERR "      ");
-			printk("  0x%lx", data[n]);
+		       sc->arg[0], sc->arg[1], sc->arg[2],
+		       sc->arg[3], sc->arg[4], sc->arg[5]);
+
+		n = sc->cmd_len - sizeof(*sc);
+		if (n > 0) {
+			printk(UM_KERN_ERR "    syscall data 0x%lx + %d",
+			       STUB_DATA + ((unsigned long) (&sc->data) &
+					    (UM_KERN_PAGE_SIZE - 1)),
+			       n);
+			print_hex_dump(UM_KERN_ERR,
+				       "    syscall data: ", 0,
+				       16, 4, sc->data, n, 0);
 		}
-		if (n > 1)
-			printk("\n");
-	}
-	else ret = 0;
 
-	*addr = check_init_stack(mm_idp, NULL);
+		/* Store error code in case someone tries to add more syscalls */
+		mm_idp->syscall_data_len = proc_data->err;
+	} else {
+		mm_idp->syscall_data_len = 0;
+	}
 
-	return ret;
+	return mm_idp->syscall_data_len;
 }
 
-long run_syscall_stub(struct mm_id * mm_idp, int syscall,
-		      unsigned long *args, long expected, void **addr,
-		      int done)
+int syscall_stub_flush(struct mm_id *mm_idp)
 {
-	unsigned long *stack = check_init_stack(mm_idp, *addr);
-
-	*stack += sizeof(long);
-	stack += *stack / sizeof(long);
-
-	*stack++ = syscall;
-	*stack++ = args[0];
-	*stack++ = args[1];
-	*stack++ = args[2];
-	*stack++ = args[3];
-	*stack++ = args[4];
-	*stack++ = args[5];
-	*stack++ = expected;
-	*stack = 0;
-
-	if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
-		     UM_KERN_PAGE_SIZE - 10 * sizeof(long))) {
-		*addr = stack;
+	int res;
+
+	if (mm_idp->syscall_data_len == 0)
 		return 0;
+
+	/* If an error happened already, report it and reset the state. */
+	if (mm_idp->syscall_data_len < 0) {
+		res = mm_idp->syscall_data_len;
+		mm_idp->syscall_data_len = 0;
+		return res;
 	}
 
-	return do_syscall_stub(mm_idp, addr);
+	res = do_syscall_stub(mm_idp);
+	mm_idp->syscall_data_len = 0;
+
+	return res;
 }
 
-long syscall_stub_data(struct mm_id * mm_idp,
-		       unsigned long *data, int data_count,
-		       void **addr, void **stub_addr)
+struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp,
+					unsigned long data_len,
+					unsigned long *data_addr)
 {
-	unsigned long *stack;
-	int ret = 0;
-
-	/*
-	 * If *addr still is uninitialized, it *must* contain NULL.
-	 * Thus in this case do_syscall_stub correctly won't be called.
-	 */
-	if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
-	   UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
-		ret = do_syscall_stub(mm_idp, addr);
-		/* in case of error, don't overwrite data on stack */
-		if (ret)
-			return ret;
+	struct stub_syscall *sc;
+	struct stub_data *proc_data = (struct stub_data *) mm_idp->stack;
+	int len;
+
+	/* Align to sizeof(long) */
+	data_len = (data_len + sizeof(long) - 1) & ~(sizeof(long) - 1);
+	len = sizeof(struct stub_syscall) + data_len;
+
+	if (len > sizeof(proc_data->syscall_data))
+		panic("Syscall data too large to marshal!");
+
+	if (mm_idp->syscall_data_len > 0 &&
+	    mm_idp->syscall_data_len + len > sizeof(proc_data->syscall_data))
+		do_syscall_stub(mm_idp);
+
+	if (mm_idp->syscall_data_len < 0) {
+		/* Return dummy without changing the syscall_next_offset to
+		 * retain error state.
+		 */
+		sc = (void *) &proc_data->syscall_data;
+	} else {
+		sc = (void *) (((unsigned long) &proc_data->syscall_data) +
+			       mm_idp->syscall_data_len);
+		mm_idp->syscall_data_len += len;
 	}
+	memset(sc, 0, len);
+	sc->cmd_len = len;
 
-	stack = check_init_stack(mm_idp, *addr);
-	*addr = stack;
-
-	*stack = data_count * sizeof(long);
+	if (data_addr)
+		*data_addr = STUB_DATA +
+			     ((unsigned long) (&sc->data) &
+			      (UM_KERN_PAGE_SIZE - 1));
 
-	memcpy(stack + 1, data, data_count * sizeof(long));
-
-	*stub_addr = (void *)(((unsigned long)(stack + 1) &
-			       ~UM_KERN_PAGE_MASK) + STUB_DATA);
-
-	return 0;
+	return sc;
 }
 
-int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
-	int phys_fd, unsigned long long offset, int done, void **data)
-{
-	int ret;
-	unsigned long args[] = { virt, len, prot,
-				 MAP_SHARED | MAP_FIXED, phys_fd,
-				 MMAP_OFFSET(offset) };
-
-	ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
-			       data, done);
 
-	return ret;
+void map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
+	int phys_fd, unsigned long long offset)
+{
+	struct stub_syscall *sc;
+
+	sc = syscall_stub_alloc(mm_idp, 0, NULL);
+	sc->syscall = STUB_MMAP_NR;
+	sc->expected_result = virt;
+	sc->arg[0] = virt;
+	sc->arg[1] = len;
+	sc->arg[2] = prot;
+	sc->arg[3] = MAP_SHARED | MAP_FIXED;
+	sc->arg[4] = phys_fd;
+	sc->arg[5] = MMAP_OFFSET(offset);
 }
 
-int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-	  int done, void **data)
+void unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
 {
-	int ret;
-	unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
-				 0 };
+	struct stub_syscall *sc;
 
-	ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
-			       data, done);
-
-	return ret;
+	sc = syscall_stub_alloc(mm_idp, 0, NULL);
+	sc->syscall = __NR_munmap;
+	sc->arg[0] = addr;
+	sc->arg[1] = len;
 }
 
-int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
-	    unsigned int prot, int done, void **data)
+void protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
+	    unsigned int prot)
 {
-	int ret;
-	unsigned long args[] = { addr, len, prot, 0, 0, 0 };
-
-	ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
-			       data, done);
+	struct stub_syscall *sc;
 
-	return ret;
+	sc = syscall_stub_alloc(mm_idp, 0, NULL);
+	sc->syscall = __NR_mprotect;
+	sc->arg[0] = addr;
+	sc->arg[1] = len;
+	sc->arg[2] = prot;
 }
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 717cfe7400a1..5a66d6558851 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -502,7 +502,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	*data = ((struct stub_data) {
 		.offset	= MMAP_OFFSET(new_offset),
 		.fd     = new_fd,
-		.parent_err = -ESRCH,
+		.err    = -ESRCH,
 		.child_err = 0,
 	});
 
@@ -539,7 +539,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 
 	wait_stub_done(pid);
 
-	pid = data->parent_err;
+	pid = data->err;
 	if (pid < 0) {
 		printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
 		      __func__, -pid);
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 77f70b969d14..50ed265488d0 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -11,7 +11,7 @@ endif
 
 obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \
 	ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
-	stub_$(BITS).o stub_segv.o \
+	stub_segv.o \
 	sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
 	mem_$(BITS).o subarch.o os-$(OS)/
 
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 3ee234b6234d..56e80c626d8a 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -12,31 +12,26 @@
 #include <os.h>
 #include <skas.h>
 #include <sysdep/tls.h>
+#include <stub-data.h>
 
 static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
 {
 	return syscall(__NR_modify_ldt, func, ptr, bytecount);
 }
 
-static long write_ldt_entry(struct mm_id *mm_idp, int func,
-		     struct user_desc *desc, void **addr, int done)
+static void write_ldt_entry(struct mm_id *mm_idp, int func,
+		     struct user_desc *desc)
 {
-	long res;
-	void *stub_addr;
-	res = syscall_stub_data(mm_idp, (unsigned long *)desc,
-				(sizeof(*desc) + sizeof(long) - 1) &
-				    ~(sizeof(long) - 1),
-				addr, &stub_addr);
-	if (!res) {
-		unsigned long args[] = { func,
-					 (unsigned long)stub_addr,
-					 sizeof(*desc),
-					 0, 0, 0 };
-		res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
-				       0, addr, done);
-	}
-
-	return res;
+	struct stub_syscall *sc;
+	unsigned long data_addr;
+
+	sc = syscall_stub_alloc(mm_idp, sizeof(*desc), &data_addr);
+	memcpy(sc->data, desc, sizeof(*desc));
+	sc->expected_result = 0;
+	sc->syscall = __NR_modify_ldt;
+	sc->arg[0] = func;
+	sc->arg[1] = data_addr;
+	sc->arg[2] = sizeof(*desc);
 }
 
 /*
@@ -125,7 +120,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 	int i, err;
 	struct user_desc ldt_info;
 	struct ldt_entry entry0, *ldt_p;
-	void *addr = NULL;
 
 	err = -EINVAL;
 	if (bytecount != sizeof(ldt_info))
@@ -146,7 +140,8 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 
 	mutex_lock(&ldt->lock);
 
-	err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
+	write_ldt_entry(mm_idp, func, &ldt_info);
+	err = syscall_stub_flush(mm_idp);
 	if (err)
 		goto out_unlock;
 
@@ -164,7 +159,8 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 				err = -ENOMEM;
 				/* Undo the change in host */
 				memset(&ldt_info, 0, sizeof(ldt_info));
-				write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
+				write_ldt_entry(mm_idp, 1, &ldt_info);
+				err = syscall_stub_flush(mm_idp);
 				goto out_unlock;
 			}
 			if (i == 0) {
@@ -301,7 +297,6 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
 	short * num_p;
 	int i;
 	long page, err=0;
-	void *addr = NULL;
 
 
 	mutex_init(&new_mm->arch.ldt.lock);
@@ -316,11 +311,9 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
 		ldt_get_host_info();
 		for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
 			desc.entry_number = *num_p;
-			err = write_ldt_entry(&new_mm->id, 1, &desc,
-					      &addr, *(num_p + 1) == -1);
-			if (err)
-				break;
+			write_ldt_entry(&new_mm->id, 1, &desc);
 		}
+		err = syscall_stub_flush(&new_mm->id);
 		new_mm->arch.ldt.entry_count = 0;
 
 		goto out;
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index ce0ca46ad383..579681d12158 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -12,4 +12,5 @@
 #endif
 
 extern void stub_segv_handler(int, siginfo_t *, void *);
+extern void stub_syscall_handler(void);
 extern void stub_clone_handler(void);
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
deleted file mode 100644
index 8291899e6aaf..000000000000
--- a/arch/x86/um/stub_32.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-
-	.globl batch_syscall_stub
-batch_syscall_stub:
-	/* %esp comes in as "top of page" */
-	mov %esp, %ecx
-	/* %esp has pointer to first operation */
-	add $8, %esp
-again:
-	/* load length of additional data */
-	mov	0x0(%esp), %eax
-
-	/* if(length == 0) : end of list */
-	/* write possible 0 to header */
-	mov	%eax, 0x4(%ecx)
-	cmpl	$0, %eax
-	jz	done
-
-	/* save current pointer */
-	mov	%esp, 0x4(%ecx)
-
-	/* skip additional data */
-	add	%eax, %esp
-
-	/* load syscall-# */
-	pop	%eax
-
-	/* load syscall params */
-	pop	%ebx
-	pop	%ecx
-	pop	%edx
-	pop	%esi
- 	pop	%edi
-	pop	%ebp
-
-	/* execute syscall */
-	int	$0x80
-
-	/* restore top of page pointer in %ecx */
-	mov	%esp, %ecx
-	andl	$(~UM_KERN_PAGE_SIZE) + 1, %ecx
-
-	/* check return value */
-	pop	%ebx
-	cmp	%ebx, %eax
-	je	again
-
-done:
-	/* save return value */
-	mov	%eax, (%ecx)
-
-	/* stop */
-	int3
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
deleted file mode 100644
index f3404640197a..000000000000
--- a/arch/x86/um/stub_64.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-	.globl batch_syscall_stub
-batch_syscall_stub:
-	/* %rsp has the pointer to first operation */
-	mov	%rsp, %rbx
-	add	$0x10, %rsp
-again:
-	/* load length of additional data */
-	mov	0x0(%rsp), %rax
-
-	/* if(length == 0) : end of list */
-	/* write possible 0 to header */
-	mov	%rax, 8(%rbx)
-	cmp	$0, %rax
-	jz	done
-
-	/* save current pointer */
-	mov	%rsp, 8(%rbx)
-
-	/* skip additional data */
-	add	%rax, %rsp
-
-	/* load syscall-# */
-	pop	%rax
-
-	/* load syscall params */
-	pop	%rdi
-	pop	%rsi
-	pop	%rdx
-	pop	%r10
- 	pop	%r8
-	pop	%r9
-
-	/* execute syscall */
-	syscall
-
-	/* check return value */
-	pop	%rcx
-	cmp	%rcx, %rax
-	je	again
-
-done:
-	/* save return value */
-	mov	%rax, (%rbx)
-
-	/* stop */
-	int3
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 17/27] um: Store full CSGSFS and SS register from mcontext
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (15 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 16/27] um: Rework syscall handling Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 18/27] um: Pass full mm_id to functions creating helper processes Benjamin Berg
                   ` (9 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
Doing this allows using registers as retrieved from an mcontext to be
pushed to a process using PTRACE_SETREGS.
It is not entirely clear to me why CSGSFS was masked. Doing so creates
issues when using the mcontext as process state in seccomp and simply
copying the register appears to work perfectly fine for ptrace.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
NOTE: I thought that a good additional test for the register store/restore
functions would be to use the registers as extraced using seccomp during
startup to run using ptrace (i.e. force using_seccomp to 0). However,
while this works great for simple tests cases, I have a scenario where
python suddenly fails to parse bytecode.
This is not an issue in general. But I don't understand why it is
happening and it might mean that there is an unexpected mismatch between
the ptrace and seccomp code paths.
---
 arch/x86/um/os-Linux/mcontext.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
index 49c3744cac37..81b9d1f9f4e6 100644
--- a/arch/x86/um/os-Linux/mcontext.c
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -26,7 +26,6 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
 	COPY(RIP);
 	COPY2(EFLAGS, EFL);
 	COPY2(CS, CSGSFS);
-	regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
-	regs->gp[CS / sizeof(unsigned long)] |= 3;
+	regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
 #endif
 }
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 18/27] um: Pass full mm_id to functions creating helper processes
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (16 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 17/27] um: Store full CSGSFS and SS register from mcontext Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 19/27] um: Move faultinfo extraction into userspace routine Benjamin Berg
                   ` (8 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
For seccomp, we need all information about the original process in
copy_context_skas0. For consistency, change both copy_context_skas0 and
start_userspace to take the mm_id struct as parameter and directly set
PID in addition to returning it.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/os.h     |  4 +--
 arch/um/kernel/skas/mmu.c       |  6 ++--
 arch/um/os-Linux/skas/process.c | 54 ++++++++++++++++-----------------
 3 files changed, 32 insertions(+), 32 deletions(-)
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 632c83d83c8d..8514d90cd5fa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -282,8 +282,8 @@ void protect(struct mm_id *mm_idp, unsigned long addr,
 
 /* skas/process.c */
 extern int is_skas_winch(int pid, int fd, void *data);
-extern int start_userspace(unsigned long stub_stack);
-extern int copy_context_skas0(unsigned long stack, int pid);
+extern int start_userspace(struct mm_id *id);
+extern int copy_context_skas0(struct mm_id *id, struct mm_id *from);
 extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
 extern int map_stub_pages(int fd, unsigned long code, unsigned long data,
 			  unsigned long stack);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 125df465e8ea..3f8fe6350234 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -31,9 +31,9 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
 
 	block_signals_trace();
 	if (from_mm)
-		to_mm->id.u.pid = copy_context_skas0(stack,
-						     from_mm->id.u.pid);
-	else to_mm->id.u.pid = start_userspace(stack);
+		copy_context_skas0(&to_mm->id, &from_mm->id);
+	else
+		start_userspace(&to_mm->id);
 	unblock_signals_trace();
 
 	if (to_mm->id.u.pid < 0) {
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 5a66d6558851..08ce1798ee96 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -271,11 +271,11 @@ int kill_userspace_mm[NR_CPUS];
  *         when negative: an error number.
  * FIXME: can PIDs become negative?!
  */
-int start_userspace(unsigned long stub_stack)
+int start_userspace(struct mm_id *id)
 {
 	void *stack;
 	unsigned long sp;
-	int pid, status, n, flags, err;
+	int status, n, flags, err;
 
 	/* setup a temporary stack page */
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -294,8 +294,8 @@ int start_userspace(unsigned long stub_stack)
 	flags = CLONE_FILES | SIGCHLD;
 
 	/* clone into new userspace process */
-	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
-	if (pid < 0) {
+	id->u.pid = clone(userspace_tramp, (void *) sp, flags, (void *) id->stack);
+	if (id->u.pid < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
 		       __func__, errno);
@@ -303,7 +303,7 @@ int start_userspace(unsigned long stub_stack)
 	}
 
 	do {
-		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
+		CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | __WALL));
 		if (n < 0) {
 			err = -errno;
 			printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
@@ -319,7 +319,7 @@ int start_userspace(unsigned long stub_stack)
 		goto out_kill;
 	}
 
-	if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
+	if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
@@ -334,10 +334,10 @@ int start_userspace(unsigned long stub_stack)
 		goto out_kill;
 	}
 
-	return pid;
+	return id->u.pid;
 
  out_kill:
-	os_kill_ptraced_process(pid, 1);
+	os_kill_ptraced_process(id->u.pid, 1);
 	return err;
 }
 
@@ -486,14 +486,14 @@ static int __init init_thread_regs(void)
 
 __initcall(init_thread_regs);
 
-int copy_context_skas0(unsigned long new_stack, int pid)
+int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 {
 	int err;
 	unsigned long current_stack = current_stub_stack();
 	struct stub_data *data = (struct stub_data *) current_stack;
-	struct stub_data *child_data = (struct stub_data *) new_stack;
+	struct stub_data *child_data = (struct stub_data *) id->stack;
 	unsigned long long new_offset;
-	int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
+	int new_fd = phys_mapping(to_phys((void *)id->stack), &new_offset);
 
 	/*
 	 * prepare offset and fd of child's stack as argument for parent's
@@ -510,18 +510,18 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 		.child_err = -ESRCH,
 	});
 
-	err = ptrace_setregs(pid, thread_regs);
+	err = ptrace_setregs(from->u.pid, thread_regs);
 	if (err < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
-		      __func__, pid, -err);
+		      __func__, from->u.pid, -err);
 		return err;
 	}
 
-	err = put_fp_registers(pid, thread_fp_regs);
+	err = put_fp_registers(from->u.pid, thread_fp_regs);
 	if (err < 0) {
 		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
-		       __func__, pid, err);
+		       __func__, from->u.pid, err);
 		return err;
 	}
 
@@ -529,36 +529,36 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	 * Wait, until parent has finished its work: read child's pid from
 	 * parent's stack, and check, if bad result.
 	 */
-	err = ptrace(PTRACE_CONT, pid, 0, 0);
+	err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
 	if (err) {
 		err = -errno;
 		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
-		       pid, errno);
+		       from->u.pid, errno);
 		return err;
 	}
 
-	wait_stub_done(pid);
+	wait_stub_done(from->u.pid);
 
-	pid = data->err;
-	if (pid < 0) {
+	id->u.pid = data->err;
+	if (id->u.pid < 0) {
 		printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
-		      __func__, -pid);
-		return pid;
+		      __func__, -id->u.pid);
+		return id->u.pid;
 	}
 
 	/*
 	 * Wait, until child has finished too: read child's result from
 	 * child's stack and check it.
 	 */
-	wait_stub_done(pid);
+	wait_stub_done(id->u.pid);
 	if (child_data->child_err != STUB_DATA) {
 		printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
-		       __func__, pid, data->child_err);
+		       __func__, id->u.pid, data->child_err);
 		err = data->child_err;
 		goto out_kill;
 	}
 
-	if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
+	if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
 		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
@@ -566,10 +566,10 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 		goto out_kill;
 	}
 
-	return pid;
+	return id->u.pid;
 
  out_kill:
-	os_kill_ptraced_process(pid, 1);
+	os_kill_ptraced_process(id->u.pid, 1);
 	return err;
 }
 
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 19/27] um: Move faultinfo extraction into userspace routine
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (17 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 18/27] um: Pass full mm_id to functions creating helper processes Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 20/27] um: Use struct uml_pt_regs for copy_context_skas0 Benjamin Berg
                   ` (7 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
The segv handler is called slightly differently depending on whether
PTRACE_FULL_FAULTINFO is set or not (32bit vs. 64bit). The only
difference is that we don't try to pass the registers and instruction
pointer to the segv handler.
It would be good to either document or remove the difference, but I do
not know why this difference exists.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/skas/process.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 08ce1798ee96..660b23389016 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -170,12 +170,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux
 	}
 }
 
-static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
-{
-	get_skas_faultinfo(pid, ®s->faultinfo, aux_fp_regs);
-	segv(regs->faultinfo, 0, 1, NULL);
-}
-
 static void handle_trap(int pid, struct uml_pt_regs *regs)
 {
 	if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
@@ -426,13 +420,15 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 
 			switch (sig) {
 			case SIGSEGV:
-				if (PTRACE_FULL_FAULTINFO) {
-					get_skas_faultinfo(pid,
-							   ®s->faultinfo, aux_fp_regs);
+				get_skas_faultinfo(pid,
+						   ®s->faultinfo, aux_fp_regs);
+
+				if (PTRACE_FULL_FAULTINFO)
 					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
 							     regs);
-				}
-				else handle_segv(pid, regs, aux_fp_regs);
+				else
+					segv(regs->faultinfo, 0, 1, NULL);
+
 				break;
 			case SIGTRAP + 0x80:
 				handle_trap(pid, regs);
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 20/27] um: Use struct uml_pt_regs for copy_context_skas0
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (18 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 19/27] um: Move faultinfo extraction into userspace routine Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 21/27] um: Add UML_SECCOMP configuration option Benjamin Berg
                   ` (6 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
This is done as preparation for seccomp support as we have no helper to
copy the registers from a plain array.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/skas/process.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 660b23389016..3bfc28d28fc0 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -462,20 +462,19 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 	}
 }
 
-static unsigned long thread_regs[MAX_REG_NR];
-static unsigned long thread_fp_regs[FP_SIZE];
+static struct uml_pt_regs thread_regs;
 
 static int __init init_thread_regs(void)
 {
-	get_safe_registers(thread_regs, thread_fp_regs);
+	get_safe_registers(thread_regs.gp, thread_regs.fp);
 	/* Set parent's instruction pointer to start of clone-stub */
-	thread_regs[REGS_IP_INDEX] = STUB_CODE +
+	thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
 				(unsigned long) stub_clone_handler -
 				(unsigned long) __syscall_stub_start;
-	thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
+	thread_regs.gp[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
 		sizeof(void *);
 #ifdef __SIGNAL_FRAMESIZE
-	thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
+	thread_regs.gp[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
 #endif
 	return 0;
 }
@@ -506,7 +505,7 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 		.child_err = -ESRCH,
 	});
 
-	err = ptrace_setregs(from->u.pid, thread_regs);
+	err = ptrace_setregs(from->u.pid, thread_regs.gp);
 	if (err < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
@@ -514,7 +513,7 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 		return err;
 	}
 
-	err = put_fp_registers(from->u.pid, thread_fp_regs);
+	err = put_fp_registers(from->u.pid, thread_regs.fp);
 	if (err < 0) {
 		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
 		       __func__, from->u.pid, err);
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 21/27] um: Add UML_SECCOMP configuration option
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (19 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 20/27] um: Use struct uml_pt_regs for copy_context_skas0 Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 22/27] um: Add stub side of SECCOMP/futex based process handling Benjamin Berg
                   ` (5 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
Add the UML_SECCOMP configuration options. The next commits will add the
support itself in smaller chunks.
Only x86_64 will be supported for now.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/Kconfig | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index c3030db3325f..769bc770c5fa 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -188,6 +188,25 @@ config UML_TIME_TRAVEL_SUPPORT
 
 	  It is safe to say Y, but you probably don't need this.
 
+config UML_SECCOMP
+	bool "seccomp based process tracing"
+	default n
+	depends on 64BIT
+	help
+	  Enable this option will enable seccomp based tracing of processes.
+
+	  UML must call syscalls from within the userspace processes when
+	  mapping physical memory in response to page faults. Using seccomp
+	  based tracing permits delaying these host syscalls until userspace
+	  processes are resumed in order to run a task, thereby avoiding
+	  overhead for the host by saving context switches.
+
+	  This feature speeds up e.g. fork() heavy workloads considerably.
+	  However, the current implementation is not safe as userspace
+	  processes can trigger any syscall to the host OS.
+
+	  If in doubt say N, as the feature has security implications.
+
 endmenu
 
 source "arch/um/drivers/Kconfig"
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 22/27] um: Add stub side of SECCOMP/futex based process handling
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (20 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 21/27] um: Add UML_SECCOMP configuration option Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 23/27] um: Add helper functions to get/set state for SECCOMP Benjamin Berg
                   ` (4 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg, Johannes Berg
This adds the stub side for the new seccomp process management code. In
this case we do register save/restore through the signal handler
mcontext. For the FS_BASE/GS_BASE register we need special handling.
Co-authored-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/skas/stub-data.h | 15 +++++++
 arch/um/kernel/skas/clone.c             | 25 ++++++++++++
 arch/um/kernel/skas/stub.c              | 53 +++++++++++++++++++++++++
 arch/x86/um/shared/sysdep/stub-data.h   | 11 +++++
 arch/x86/um/shared/sysdep/stub.h        |  3 ++
 arch/x86/um/shared/sysdep/stub_32.h     |  5 +++
 arch/x86/um/shared/sysdep/stub_64.h     | 10 +++++
 7 files changed, 122 insertions(+)
 create mode 100644 arch/x86/um/shared/sysdep/stub-data.h
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index efa78bc359cb..e130c428cda9 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -8,8 +8,13 @@
 #ifndef __STUB_DATA_H
 #define __STUB_DATA_H
 
+#include <linux/kconfig.h>
 #include <linux/compiler_types.h>
 #include <as-layout.h>
+#include <sysdep/stub-data.h>
+
+#define FUTEX_IN_CHILD 0
+#define FUTEX_IN_KERN 1
 
 #define STUB_NEXT_SYSCALL(s) \
 	((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
@@ -32,6 +37,16 @@ struct stub_data {
 	unsigned char syscall_data[UM_KERN_PAGE_SIZE - MINSIGSTKSZ - 128]
 		      __aligned(16);
 
+	/* data shared with signal handler (only used in seccomp mode) */
+	short restart_wait;
+	unsigned int futex;
+	int signal;
+	unsigned short si_offset;
+	unsigned short mctx_offset;
+
+	/* seccomp architecture specific state restore */
+	struct stub_data_arch arch_data;
+
 	/* Stack for our signal handlers and for calling into . */
 	unsigned char sigstack[MINSIGSTKSZ + 32] __aligned(16);
 };
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index a680d80b3870..d6e0742c77fd 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -49,3 +49,28 @@ stub_clone_handler(void)
  done:
 	trap_myself();
 }
+
+#ifdef CONFIG_UML_SECCOMP
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_clone_handler_seccomp(void)
+{
+	int stack;
+	struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
+	long err;
+
+	/* Use the syscall data as a temporary stack area. */
+	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
+			    (unsigned long) data->syscall_data +
+					    sizeof(data->syscall_data) -
+					    sizeof(void *));
+	if (err) {
+		data->err = err;
+		goto done;
+	}
+
+	remap_stack_and_trap();
+
+ done:
+	trap_myself();
+}
+#endif
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 5d1bcc883866..4bf32fdf4599 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -5,6 +5,13 @@
 
 #include <sysdep/stub.h>
 
+#ifdef CONFIG_UML_SECCOMP
+#include <linux/futex.h>
+#include <errno.h>
+
+#define CATCH_EINTR(expr) while ((res = (expr)) && (res == -EINTR))
+#endif
+
 static __always_inline int
 syscall_handler(struct stub_data *d)
 {
@@ -50,3 +57,49 @@ stub_syscall_handler(void)
 
 	trap_myself();
 }
+
+#ifdef CONFIG_UML_SECCOMP
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_signal_interrupt(int sig, siginfo_t *info, void *p)
+{
+	int stack;
+	struct stub_data *d = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
+	ucontext_t *uc = p;
+	long res;
+
+	d->signal = sig;
+	d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
+	d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
+
+restart_wait:
+	d->futex = FUTEX_IN_KERN;
+	CATCH_EINTR(stub_syscall3(__NR_futex, (unsigned long)&d->futex,
+				  FUTEX_WAKE, 1));
+	do {
+		res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
+				    FUTEX_WAIT, FUTEX_IN_KERN, 0);
+	} while (res == -EINTR || d->futex == FUTEX_IN_KERN);
+
+	if (res < 0 && res != -EAGAIN)
+		stub_syscall2(__NR_kill, 0, SIGKILL);
+
+	/* Try running queued syscalls. */
+	if (syscall_handler(d) < 0 || d->restart_wait) {
+		/* Report SIGTRAP if we restart. */
+		d->signal = SIGTRAP;
+		d->restart_wait = 0;
+		goto restart_wait;
+	}
+
+	/* Restore arch dependent state that is not part of the mcontext */
+	stub_seccomp_restore_state(&d->arch_data);
+
+	/* Return so that the host modified mcontext is restored. */
+}
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_signal_restorer(void)
+{
+	stub_syscall0(__NR_rt_sigreturn);
+}
+#endif
diff --git a/arch/x86/um/shared/sysdep/stub-data.h b/arch/x86/um/shared/sysdep/stub-data.h
new file mode 100644
index 000000000000..2e71b48ebb1f
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub-data.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef __i386__
+struct stub_data_arch { };
+#else
+struct stub_data_arch {
+	int sync;
+	unsigned long fs_base;
+	unsigned long gs_base;
+};
+#endif
+
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index 579681d12158..eb2e3a24d40b 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -14,3 +14,6 @@
 extern void stub_segv_handler(int, siginfo_t *, void *);
 extern void stub_syscall_handler(void);
 extern void stub_clone_handler(void);
+extern void stub_signal_interrupt(int, siginfo_t *, void *);
+extern void stub_signal_restorer(void);
+extern void stub_clone_handler_seccomp(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 3fb8559fe994..62aa7597576a 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -130,4 +130,9 @@ static __always_inline void remap_stack_and_trap(void)
 		"memory");
 }
 
+static __always_inline void stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+	/* No extra arch specific restore */
+}
+
 #endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index c41ae0462c8f..69f9aeca8cee 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -8,6 +8,7 @@
 
 #include <sysdep/ptrace_user.h>
 #include <generated/asm-offsets.h>
+#include <asm/prctl.h>
 
 #define STUB_MMAP_NR __NR_mmap
 #define MMAP_OFFSET(o) (o)
@@ -125,4 +126,13 @@ static __always_inline void remap_stack_and_trap(void)
 		__syscall_clobber, "r10", "r8", "r9");
 }
 
+static __always_inline void stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+	/* TODO: Use _writefsbase_u64/_writegsbase_u64 when possible */
+	if (arch->sync & 0x1)
+		stub_syscall2(__NR_arch_prctl, ARCH_SET_FS, arch->fs_base);
+	if (arch->sync & 0x2)
+		stub_syscall2(__NR_arch_prctl, ARCH_SET_GS, arch->gs_base);
+}
+
 #endif
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 23/27] um: Add helper functions to get/set state for SECCOMP
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (21 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 22/27] um: Add stub side of SECCOMP/futex based process handling Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 24/27] um: Add SECCOMP support detection and initialization Benjamin Berg
                   ` (3 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
When not using ptrace, we need to both save and restore registers
through the mcontext as provided by the host kernel to our signal
handlers.
Add corresponding functions to store the state to an mcontext and
helpers to access the mcontext of the subprocess through the stub data.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/x86/um/os-Linux/mcontext.c      | 100 +++++++++++++++++++++++++++
 arch/x86/um/shared/sysdep/mcontext.h |   9 +++
 2 files changed, 109 insertions(+)
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
index 81b9d1f9f4e6..506ce0aa7108 100644
--- a/arch/x86/um/os-Linux/mcontext.c
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -3,6 +3,9 @@
 #define __FRAME_OFFSETS
 #include <asm/ptrace.h>
 #include <sysdep/ptrace.h>
+#include <string.h>
+#include <signal.h>
+#include <sysdep/mcontext.h>
 
 void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
 {
@@ -16,6 +19,10 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
 	COPY2(UESP, ESP); /* sic */
 	COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
 	COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#undef COPY2
+#undef COPY
+#undef COPY_SEG
+#undef COPY_SEG_CPL3
 #else
 #define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
 #define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
@@ -27,5 +34,98 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
 	COPY2(EFLAGS, EFL);
 	COPY2(CS, CSGSFS);
 	regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
+#undef COPY2
+#undef COPY
 #endif
 }
+
+#ifdef CONFIG_UML_SECCOMP
+/* Same thing, but the copy macros are turned around. */
+void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc, int single_stepping)
+{
+#ifdef __i386__
+#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X]
+#define COPY(X) mc->gregs[REG_##X] = regs->gp[X]
+#define COPY_SEG(X) mc->gregs[REG_##X] = mc->gregs[REG_##X] & 0xffff;
+#define COPY_SEG_CPL3(X) mc->gregs[REG_##X] = (regs->gp[X] & 0xffff) | 3;
+	COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
+	COPY(EDI); COPY(ESI); COPY(EBP);
+	COPY2(UESP, ESP); /* sic */
+	COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
+	COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
+#else
+#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X/sizeof(unsigned long)]
+#define COPY(X) mc->gregs[REG_##X] = regs->gp[X/sizeof(unsigned long)]
+	COPY(R8); COPY(R9); COPY(R10); COPY(R11);
+	COPY(R12); COPY(R13); COPY(R14); COPY(R15);
+	COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
+	COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
+	COPY(RIP);
+	COPY2(EFLAGS, EFL);
+	mc->gregs[REG_CSGSFS] = mc->gregs[REG_CSGSFS] & 0xffffffffffffl;
+	mc->gregs[REG_CSGSFS] |= (regs->gp[SS / sizeof(unsigned long)] & 0xffff) << 48;
+#endif
+
+	if (single_stepping)
+		mc->gregs[REG_EFL] |= X86_EFLAGS_TF;
+	else
+		mc->gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+}
+
+void get_stub_state(struct uml_pt_regs *regs, struct stub_data *data)
+{
+	mcontext_t *mcontext;
+
+	if (data->mctx_offset > sizeof(data->sigstack) - sizeof(*mcontext))
+		panic("%s - Invalid mcontext offset from child!\n", __func__);
+
+	mcontext = (void *)&data->sigstack[data->mctx_offset];
+
+	get_regs_from_mc(regs, mcontext);
+	/* Copy floating point registers. As fpregs is a pointer, we need to make some
+	 * assumptions here in order to dereference it.
+	 * As such, assume it is on the same memory page.
+	 */
+	memcpy(®s->fp,
+	       (void *) (((unsigned long) mcontext->fpregs & (UM_KERN_PAGE_SIZE - 1)) +
+			 (unsigned long) data),
+	       sizeof(*mcontext->fpregs));
+
+	/* We do not need to read the x86_64 FS_BASE/GS_BASE registers as
+	 * we do not permit userspace to set them directly.
+	 */
+}
+
+void set_stub_state(struct uml_pt_regs *regs, struct stub_data *data, int single_stepping)
+{
+	mcontext_t *mcontext = (void *)&data->sigstack[data->mctx_offset];
+
+	get_mc_from_regs(regs, mcontext, single_stepping);
+
+	/* Copy floating point registers (note that mc->fpregs is a userspace address) */
+	memcpy((void *) ((unsigned long) mcontext->fpregs - STUB_DATA +
+			 (unsigned long) data),
+	       ®s->fp, sizeof(*mcontext->fpregs));
+
+#ifdef __i386__
+	/*
+	 * On x86, we need to sync the GDT entries for the thread local storage.
+	 */
+	#error "Not implemented"
+#else
+	/*
+	 * On x86_64, we need to sync back the FS_BASE/GS_BASE registers
+	 * using the arch specific data.
+	 */
+	data->arch_data.sync = 0;
+
+	if (data->arch_data.fs_base != regs->gp[FS_BASE / sizeof(unsigned long)])
+		data->arch_data.sync |= 0x1;
+	if (data->arch_data.gs_base != regs->gp[GS_BASE / sizeof(unsigned long)])
+		data->arch_data.sync |= 0x2;
+
+	data->arch_data.fs_base = regs->gp[FS_BASE / sizeof(unsigned long)];
+	data->arch_data.gs_base = regs->gp[GS_BASE / sizeof(unsigned long)];
+#endif
+}
+#endif
diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h
index b724c54da316..63334c36c269 100644
--- a/arch/x86/um/shared/sysdep/mcontext.h
+++ b/arch/x86/um/shared/sysdep/mcontext.h
@@ -6,7 +6,16 @@
 #ifndef __SYS_SIGCONTEXT_X86_H
 #define __SYS_SIGCONTEXT_X86_H
 
+#include <linux/kconfig.h>
+#include <stub-data.h>
+
 extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
+extern void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc,
+			     int single_stepping);
+
+extern void get_stub_state(struct uml_pt_regs *regs, struct stub_data *data);
+extern void set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
+			   int single_stepping);
 
 #ifdef __i386__
 
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 24/27] um: Add SECCOMP support detection and initialization
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (22 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 23/27] um: Add helper functions to get/set state for SECCOMP Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 25/27] um: Die if a child dies unexpectedly in seccomp mode Benjamin Berg
                   ` (2 subsequent siblings)
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
This detects seccomp support, sets the global using_seccomp variable and
initilizes the exec registers. For now, the implementation simply falls
through to the ptrace startup code, meaning that it is unused.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/skas/skas.h |   6 ++
 arch/um/os-Linux/registers.c       |   4 +-
 arch/um/os-Linux/skas/process.c    |   3 +
 arch/um/os-Linux/start_up.c        | 132 ++++++++++++++++++++++++++++-
 4 files changed, 141 insertions(+), 4 deletions(-)
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index c93d2cbc8f32..f10599995d4d 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -6,8 +6,14 @@
 #ifndef __SKAS_H
 #define __SKAS_H
 
+#include <linux/kconfig.h>
 #include <sysdep/ptrace.h>
 
+#ifdef CONFIG_UML_SECCOMP
+extern int using_seccomp;
+#else
+#define using_seccomp 0
+#endif
 extern int userspace_pid[];
 
 extern int user_thread(unsigned long stack, int flags);
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index 52823368e15a..7c86e0414db0 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -13,8 +13,8 @@
 
 /* This is set once at boot time and not changed thereafter */
 
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long exec_fp_regs[FP_SIZE];
+unsigned long exec_regs[MAX_REG_NR];
+unsigned long exec_fp_regs[FP_SIZE];
 
 int init_registers(int pid)
 {
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 3bfc28d28fc0..a3f0dd0f47d0 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -251,6 +251,9 @@ static int userspace_tramp(void *stack)
 	return 0;
 }
 
+#ifdef CONFIG_UML_SECCOMP
+int using_seccomp;
+#endif
 int userspace_pid[NR_CPUS];
 int kill_userspace_mm[NR_CPUS];
 
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 8e6f8c4f3f62..2a581ea1b0ef 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
@@ -22,6 +24,13 @@
 #include <os.h>
 #include <mem_user.h>
 #include <ptrace_user.h>
+#ifdef CONFIG_UML_SECCOMP
+#include <stub-data.h>
+#include <sys/prctl.h>
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <sysdep/mcontext.h>
+#endif
 #include <registers.h>
 #include <skas.h>
 
@@ -221,6 +230,112 @@ static void __init check_ptrace(void)
 	check_sysemu();
 }
 
+#ifdef CONFIG_UML_SECCOMP
+extern unsigned long exec_regs[MAX_REG_NR];
+extern unsigned long exec_fp_regs[FP_SIZE];
+
+static struct stub_data __initdata *seccomp_shared_page;
+
+static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
+{
+	ucontext_t *uc = p;
+
+	/* Stow away the location of the mcontext in the stack */
+	seccomp_shared_page->mctx_offset = (unsigned long)&uc->uc_mcontext -
+					   (unsigned long)&seccomp_shared_page->sigstack[0];
+	exit(0);
+}
+
+static bool __init init_seccomp(void)
+{
+	int pid;
+	int status;
+	int n;
+
+	/* We check that we can install a seccomp filter and then exit(0)
+	 * from a trapped syscall.
+	 *
+	 * Note that we cannot verify that no seccomp filter already exists
+	 * for a syscall that results in the process/thread to be killed.
+	 */
+
+	os_info("Checking that seccomp filters can be installed...");
+
+	seccomp_shared_page = mmap(0, UM_KERN_PAGE_SIZE,
+				   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
+
+	pid = fork();
+	if (pid == 0) {
+		static struct sock_filter filter[] = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				offsetof(struct seccomp_data, nr)),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+		};
+		static struct sock_fprog prog = {
+			.len = ARRAY_SIZE(filter),
+			.filter = filter,
+		};
+		struct sigaction sa;
+
+		set_sigstack(seccomp_shared_page->sigstack,
+			     sizeof(seccomp_shared_page->sigstack));
+
+		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+		sa.sa_sigaction = (void *) sigsys_handler;
+		sa.sa_restorer = NULL;
+		if (sigaction(SIGSYS, &sa, NULL) < 0)
+			exit(1);
+
+		prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+		if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+			    SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
+			exit(2);
+
+		sleep(0);
+
+		/* Never reached. */
+		exit(3);
+	}
+
+	if (pid < 0)
+		fatal_perror("check_seccomp : fork failed");
+
+	CATCH_EINTR(n = waitpid(pid, &status, 0));
+	if (n < 0)
+		fatal_perror("check_seccomp : waitpid failed");
+
+	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+		struct uml_pt_regs regs = { 0 };
+
+		/* XXX: Can we add something like this easily anywhere?
+		 * COMPILE_BUG_ON(sizeof(*mc->fpregs) > sizeof(regs->fp));
+		 */
+
+		/* Copy registers, the init_registers function assumes ptrace. */
+		get_stub_state(®s, seccomp_shared_page);
+
+		memcpy(exec_regs, regs.gp, sizeof(exec_regs));
+		memcpy(exec_fp_regs, regs.fp, sizeof(exec_fp_regs));
+
+		munmap(seccomp_shared_page, sizeof(mcontext_t));
+
+		os_info("OK\n");
+
+		return true;
+	}
+
+	if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
+		os_info("missing\n");
+	else
+		os_info("error\n");
+
+	munmap(seccomp_shared_page, sizeof(mcontext_t));
+	return false;
+}
+#endif
+
 extern void check_tmpexec(void);
 
 static void __init check_coredump_limit(void)
@@ -253,13 +368,26 @@ void __init os_early_checks(void)
 	/* Print out the core dump limits early */
 	check_coredump_limit();
 
-	check_ptrace();
-
 	/* Need to check this early because mmapping happens before the
 	 * kernel is running.
 	 */
 	check_tmpexec();
 
+#ifdef CONFIG_UML_SECCOMP
+	using_seccomp = 0;
+
+	if (init_seccomp()) {
+		/* Not fully implemented */
+#if 0
+		using_seccomp = 1;
+
+		return;
+#endif
+	}
+#endif
+
+	check_ptrace();
+
 	pid = start_ptraced_child();
 	if (init_registers(pid))
 		fatal("Failed to initialize default registers");
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 25/27] um: Die if a child dies unexpectedly in seccomp mode
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (23 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 24/27] um: Add SECCOMP support detection and initialization Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 26/27] um: Implement kernel side of SECCOMP based process handling Benjamin Berg
  2021-03-03 15:55 ` [PATCH 27/27] um: Delay flushing syscalls until the thread is restarted Benjamin Berg
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
When in seccomp mode, we would hang forever on the futex if a child has
died unexpectedly. In contrast, ptrace mode will notice it and kill the
corresponding thread when it fails to run it.
Fix this issue by simply printing a message and aborting. In this case
something from the outside (e.g. OOM killer) has interferred with the
machine and it is reasonable to not try to recover.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/os.h |  1 +
 arch/um/os-Linux/process.c  | 40 +++++++++++++++++++++++++++++++++++++
 arch/um/os-Linux/signal.c   |  7 +++++++
 3 files changed, 48 insertions(+)
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 8514d90cd5fa..283e95731d6f 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -192,6 +192,7 @@ extern void check_host_supports_tls(int *supports_tls, int *tls_min);
 extern int create_mem_file(unsigned long long len);
 
 /* process.c */
+void os_check_child_lost(void);
 extern unsigned long os_process_pc(int pid);
 extern int os_process_parent(int pid);
 extern void os_alarm_process(int pid);
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index e52dd37ddadc..db98fc79d9e2 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -17,6 +17,7 @@
 #include <init.h>
 #include <longjmp.h>
 #include <os.h>
+#include <skas/skas.h>
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
@@ -102,9 +103,18 @@ void os_stop_process(int pid)
 
 void os_kill_process(int pid, int reap_child)
 {
+	sigset_t chld;
+
+	/* Block SIGCHLD so that we can reap it before the handler runs. */
+	sigemptyset(&chld);
+	sigaddset(&chld, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &chld, NULL);
+
 	kill(pid, SIGKILL);
 	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+	sigprocmask(SIG_UNBLOCK, &chld, NULL);
 }
 
 /* Kill off a ptraced child by all means available.  kill it normally first,
@@ -114,11 +124,39 @@ void os_kill_process(int pid, int reap_child)
 
 void os_kill_ptraced_process(int pid, int reap_child)
 {
+	sigset_t chld;
+
+	/* Block SIGCHLD so that we can reap it before the handler runs. */
+	sigemptyset(&chld);
+	sigaddset(&chld, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &chld, NULL);
+
 	kill(pid, SIGKILL);
 	ptrace(PTRACE_KILL, pid);
 	ptrace(PTRACE_CONT, pid);
 	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+	sigprocmask(SIG_UNBLOCK, &chld, NULL);
+}
+
+void os_check_child_lost(void)
+{
+	int status;
+	pid_t pid;
+
+	/*
+	 * Check if we can reap a child.
+	 * Any expected kills will clean up without this handler being fired.
+	 */
+	pid = waitpid(-1, &status, WNOHANG);
+	if (pid <= 0)
+		return;
+
+	os_warn("Child %d died unexpectedly with status %d, cannot recover in seccomp mode!\r\n",
+		pid, status);
+	/* Kill ourselves including all children. */
+	killpg(os_getpid(), SIGABRT);
 }
 
 /* Don't use the glibc version, which caches the result in TLS. It misses some
@@ -283,5 +321,7 @@ void init_new_thread_signals(void)
 	set_handler(SIGBUS);
 	signal(SIGHUP, SIG_IGN);
 	set_handler(SIGIO);
+	if (using_seccomp)
+		set_handler(SIGCHLD);
 	signal(SIGWINCH, SIG_IGN);
 }
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 96f511d1aabe..d04b39bace53 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -94,6 +94,11 @@ static void timer_real_alarm_handler(mcontext_t *mc)
 	timer_handler(SIGALRM, NULL, ®s);
 }
 
+static void sig_child_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
+{
+	os_check_child_lost();
+}
+
 void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
 {
 	int enabled;
@@ -155,6 +160,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 
 	[SIGIO] = sig_handler,
 	[SIGWINCH] = sig_handler,
+	/* SIGCHLD is only registered in seccomp mode. */
+	[SIGCHLD] = sig_child_handler,
 	[SIGALRM] = timer_alarm_handler,
 
 	[SIGUSR1] = sigusr1_handler,
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 26/27] um: Implement kernel side of SECCOMP based process handling
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (24 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 25/27] um: Die if a child dies unexpectedly in seccomp mode Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  2021-03-03 15:55 ` [PATCH 27/27] um: Delay flushing syscalls until the thread is restarted Benjamin Berg
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg, Johannes Berg
This adds the kernel side of the seccomp based process handling.
Co-authored-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/os-Linux/skas/mem.c     |  36 ++-
 arch/um/os-Linux/skas/process.c | 534 +++++++++++++++++++++++---------
 arch/um/os-Linux/start_up.c     |   3 -
 3 files changed, 414 insertions(+), 159 deletions(-)
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index b52d536d2d4d..648d9406eb25 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <errno.h>
@@ -22,6 +23,7 @@
 extern char __syscall_stub_start[];
 
 extern void wait_stub_done(int pid);
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running);
 
 static inline unsigned long *check_init_stack(struct mm_id *mm_idp,
 					      unsigned long *stack)
@@ -56,24 +58,30 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
 	int n, i;
 	int err, pid = mm_idp->u.pid;
 
-	n = ptrace_setregs(pid, syscall_regs);
-	if (n < 0) {
-		printk(UM_KERN_ERR "Registers - \n");
-		for (i = 0; i < MAX_REG_NR; i++)
-			printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
-		panic("%s : PTRACE_SETREGS failed, errno = %d\n",
-		      __func__, -n);
-	}
-
 	/* Inform process how much we have filled in. */
 	proc_data->syscall_data_len = mm_idp->syscall_data_len;
 
-	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if (err)
-		panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
-		      errno);
+	if (using_seccomp) {
+		/* We never leave the signal seccomp signal handler in the child. */
+		proc_data->restart_wait = 1;
+		wait_stub_done_seccomp(pid, proc_data, 0);
+	} else {
+		n = ptrace_setregs(pid, syscall_regs);
+		if (n < 0) {
+			printk(UM_KERN_ERR "Registers -\n");
+			for (i = 0; i < MAX_REG_NR; i++)
+				printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
+			panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+			      __func__, -n);
+		}
+
+		err = ptrace(PTRACE_CONT, pid, 0, 0);
+		if (err)
+			panic("Failed to continue stub, pid = %d, errno = %d\n",
+			      pid, errno);
 
-	wait_stub_done(pid);
+		wait_stub_done(pid);
+	}
 
 	/*
 	 * proc_data->err will be non-zero if there was an (unexpected) error.
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index a3f0dd0f47d0..ceb75ac47b5f 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sched.h>
@@ -21,7 +23,13 @@
 #include <registers.h>
 #include <skas.h>
 #include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
 #include <linux/threads.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/futex.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
@@ -136,6 +144,58 @@ void wait_stub_done(int pid)
 	fatal_sigsegv();
 }
 
+#ifdef CONFIG_UML_SECCOMP
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running)
+{
+	int ret;
+
+	do {
+		if (!running) {
+			data->signal = 0;
+			data->futex = FUTEX_IN_CHILD;
+			CATCH_EINTR(syscall(__NR_futex, &data->futex,
+					    FUTEX_WAKE, 1, NULL, NULL, 0));
+		}
+
+		do {
+			ret = syscall(__NR_futex, &data->futex,
+				      FUTEX_WAIT, FUTEX_IN_CHILD,
+				      NULL, NULL, 0);
+		} while ((ret == -1 && errno == EINTR) || data->futex == FUTEX_IN_CHILD);
+
+		running = 0;
+
+		/* We may receive a SIGALRM, if we do, we are not done yet and need to iterate. */
+	} while (data->signal == SIGALRM);
+
+	if (ret < 0 && errno != EAGAIN) {
+		printk(UM_KERN_ERR "%s : waiting for child futex failed, errno = %d\n",
+		       __func__, errno);
+		goto out_kill;
+	}
+
+	if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+		printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+		goto out_kill;
+	}
+
+	if (data->signal != SIGTRAP) {
+		printk(UM_KERN_ERR "%s : expected SIGTRAP but got %d",
+		       __func__, data->signal);
+		goto out_kill;
+	}
+
+	return;
+
+out_kill:
+	printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, errno = %d\n",
+	       __func__, pid, errno);
+	fatal_sigsegv();
+}
+#else
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running);
+#endif
+
 extern unsigned long current_stub_stack(void);
 
 static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs)
@@ -197,16 +257,16 @@ extern char __syscall_stub_start[];
  */
 static int userspace_tramp(void *stack)
 {
-	struct sigaction sa;
 	struct stub_data *data;
 	void *addr;
 	int fd;
 	unsigned long long offset;
-	unsigned long segv_handler = STUB_CODE +
-				     (unsigned long) stub_segv_handler -
-				     (unsigned long) __syscall_stub_start;
 
-	ptrace(PTRACE_TRACEME, 0, 0, 0);
+	if (!using_seccomp)
+		ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+	/* Needed for seccomp, but this is sane anyway. */
+	prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 
 	signal(SIGTERM, SIG_DFL);
 	signal(SIGWINCH, SIG_IGN);
@@ -237,17 +297,127 @@ static int userspace_tramp(void *stack)
 	 */
 
 	set_sigstack((void *) &data->sigstack, sizeof(data->sigstack));
-	sigemptyset(&sa.sa_mask);
-	sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
-	sa.sa_sigaction = (void *) segv_handler;
-	sa.sa_restorer = NULL;
-	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-		os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
-			__func__, errno);
-		exit(1);
+
+	if (using_seccomp) {
+		struct rlimit lim;
+			struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+			/* [0] Load upper 32bit of instruction pointer from seccomp_data */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				(offsetof(struct seccomp_data, instruction_pointer) + 4)),
+
+			/* [1] Jump forward 3 instructions if the upper address is not identical */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (STUB_CODE) >> 32, 0, 4),
+#endif
+			/* [2] Load lower 32bit of instruction pointer from seccomp_data */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				(offsetof(struct seccomp_data, instruction_pointer))),
+
+			/* [3] Mask out lower bits */
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+			/* [4] Jump to [6] if the lower bits are not on the expected page */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (STUB_CODE) & 0xfffff000, 0, 1),
+
+			/* [5] Permitted call, allow */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+
+			/* [6] Restricted call, replace with SIGSYS */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+		};
+		struct sock_fprog prog = {
+			.len = ARRAY_SIZE(filter),
+			.filter = filter,
+		};
+
+		/*
+		 *With seccomp we return normally from the signal handler, so
+		 * avoid setting things up through libc which may do its own
+		 * thing for restoring.
+		 */
+		struct sigaction_real {
+			void *sa_handler_;
+			unsigned long sa_flags;
+			void *sa_restorer;
+			sigset_t sa_mask;
+		} sa;
+
+		unsigned long v = STUB_CODE +
+				  (unsigned long) stub_signal_interrupt -
+				  (unsigned long) __syscall_stub_start;
+		unsigned long r = STUB_CODE +
+				  (unsigned long) stub_signal_restorer -
+				  (unsigned long) __syscall_stub_start;
+
+		/* Never coredump */
+		lim.rlim_cur = 0;
+		lim.rlim_max = 0;
+		if (setrlimit(RLIMIT_CORE, &lim) < 0) {
+			os_info("Could not set coredump size limit, errno = %d\n",
+				errno);
+			exit(1);
+		}
+
+		sigemptyset(&sa.sa_mask);
+		sigaddset(&sa.sa_mask, SIGALRM);
+		sigaddset(&sa.sa_mask, SIGCHLD);
+		sa.sa_flags = SA_ONSTACK | SA_SIGINFO | 0x04000000; /* SA_RESTORER */
+		sa.sa_handler_ = (void *)v;
+		sa.sa_restorer = (void *)r;
+		if (syscall(__NR_rt_sigaction, SIGSEGV, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGSYS, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGSYS handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGALRM, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGALRM handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGTRAP, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGTRAP handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGFPE, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGFPE handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+			    SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
+			exit(42);
+
+		trap_myself();
+	} else {
+		struct sigaction sa;
+		unsigned long segv_handler = STUB_CODE +
+					     (unsigned long) stub_segv_handler -
+					     (unsigned long) __syscall_stub_start;
+
+		sigemptyset(&sa.sa_mask);
+		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+		sa.sa_sigaction = (void *) segv_handler;
+		sa.sa_restorer = NULL;
+		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+			os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		kill(os_getpid(), SIGSTOP);
 	}
 
-	kill(os_getpid(), SIGSTOP);
 	return 0;
 }
 
@@ -273,6 +443,7 @@ int start_userspace(struct mm_id *id)
 	void *stack;
 	unsigned long sp;
 	int status, n, flags, err;
+	struct stub_data *proc_data = (void *) id->stack;
 
 	/* setup a temporary stack page */
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -290,6 +461,9 @@ int start_userspace(struct mm_id *id)
 
 	flags = CLONE_FILES | SIGCHLD;
 
+	if (using_seccomp)
+		proc_data->futex = FUTEX_IN_CHILD;
+
 	/* clone into new userspace process */
 	id->u.pid = clone(userspace_tramp, (void *) sp, flags, (void *) id->stack);
 	if (id->u.pid < 0) {
@@ -299,29 +473,33 @@ int start_userspace(struct mm_id *id)
 		return err;
 	}
 
-	do {
-		CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | __WALL));
-		if (n < 0) {
+	if (using_seccomp) {
+		wait_stub_done_seccomp(id->u.pid, proc_data, 1);
+	} else {
+		do {
+			CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | __WALL));
+			if (n < 0) {
+				err = -errno;
+				printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+				       __func__, errno);
+				goto out_kill;
+			}
+		} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+		if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+			err = -EINVAL;
+			printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+			       __func__, status);
+			goto out_kill;
+		}
+
+		if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
+			   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
 			err = -errno;
-			printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+			printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
 			       __func__, errno);
 			goto out_kill;
 		}
-	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
-
-	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
-		err = -EINVAL;
-		printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
-		       __func__, status);
-		goto out_kill;
-	}
-
-	if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
-		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
-		       __func__, errno);
-		goto out_kill;
 	}
 
 	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@@ -341,7 +519,9 @@ int start_userspace(struct mm_id *id)
 void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 {
 	int err, status, op, pid = userspace_pid[0];
-	siginfo_t si;
+	siginfo_t si_ptrace;
+	siginfo_t *si;
+	int sig;
 
 	/* Handle any immediate reschedules or signals */
 	interrupt_end();
@@ -350,94 +530,148 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 		if (kill_userspace_mm[0])
 			fatal_sigsegv();
 
-		/*
-		 * This can legitimately fail if the process loads a
-		 * bogus value into a segment register.  It will
-		 * segfault and PTRACE_GETREGS will read that value
-		 * out of the process.  However, PTRACE_SETREGS will
-		 * fail.  In this case, there is nothing to do but
-		 * just kill the process.
-		 */
-		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+		if (using_seccomp) {
+			struct stub_data *proc_data = (void *)current_stub_stack();
+			int ret;
 
-		if (put_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			set_stub_state(regs, proc_data, singlestepping(NULL));
 
-		if (singlestepping(NULL))
-			op = PTRACE_SYSEMU_SINGLESTEP;
-		else
-			op = PTRACE_SYSEMU;
+			/* Must have been reset by the syscall caller */
+			if (proc_data->restart_wait != 0)
+				panic("Programming error: Flag to only run syscalls in child was not cleared!");
 
-		if (ptrace(op, pid, 0, 0)) {
-			printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
-			       __func__, op, errno);
-			fatal_sigsegv();
-		}
+			proc_data->signal = 0;
+			proc_data->futex = FUTEX_IN_CHILD;
+			CATCH_EINTR(syscall(__NR_futex, &proc_data->futex,
+					    FUTEX_WAKE, 1, NULL, NULL, 0));
+			do {
+				ret = syscall(__NR_futex, &proc_data->futex,
+					      FUTEX_WAIT, FUTEX_IN_CHILD, NULL, NULL, 0);
+			} while ((ret == -1 && errno == EINTR) ||
+				 proc_data->futex == FUTEX_IN_CHILD);
 
-		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-		if (err < 0) {
-			printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			sig = proc_data->signal;
 
-		regs->is_user = 1;
-		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			get_stub_state(regs, proc_data);
 
-		if (get_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
+				panic("%s - Invalid siginfo offset from child",
+				      __func__);
+			si = (void *)&proc_data->sigstack[proc_data->si_offset];
 
-		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+			regs->is_user = 1;
 
-		if (WIFSTOPPED(status)) {
-			int sig = WSTOPSIG(status);
+			/* Fill in ORIG_RAX and extract fault information */
+			PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+			if (sig == SIGSEGV) {
+				mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
 
-			/* These signal handlers need the si argument.
-			 * The SIGIO and SIGALARM handlers which constitute the
-			 * majority of invocations, do not use it.
+				GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
+			}
+		} else {
+			/*
+			 * This can legitimately fail if the process loads a
+			 * bogus value into a segment register.  It will
+			 * segfault and PTRACE_GETREGS will read that value
+			 * out of the process.  However, PTRACE_SETREGS will
+			 * fail.  In this case, there is nothing to do but
+			 * just kill the process.
 			 */
-			switch (sig) {
-			case SIGSEGV:
-			case SIGTRAP:
-			case SIGILL:
-			case SIGBUS:
-			case SIGFPE:
-			case SIGWINCH:
-				ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
-				break;
+			if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+				printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (put_fp_registers(pid, regs->fp)) {
+				printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (singlestepping(NULL))
+				op = PTRACE_SYSEMU_SINGLESTEP;
+			else
+				op = PTRACE_SYSEMU;
+
+			if (ptrace(op, pid, 0, 0)) {
+				printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+				       __func__, op, errno);
+				fatal_sigsegv();
+			}
+
+			CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
+			if (err < 0) {
+				printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			regs->is_user = 1;
+			if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+				printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (get_fp_registers(pid, regs->fp)) {
+				printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
 			}
 
+			if (WIFSTOPPED(status)) {
+				sig = WSTOPSIG(status);
+
+				/* These signal handlers need the si argument
+				 * and SIGSEGV needs the faultinfo.
+				 * The SIGIO and SIGALARM handlers which constitute the
+				 * majority of invocations, do not use it.
+				 */
+				switch (sig) {
+				case SIGSEGV:
+					get_skas_faultinfo(pid,
+							   ®s->faultinfo,
+							   aux_fp_regs);
+					fallthrough;
+				case SIGTRAP:
+				case SIGILL:
+				case SIGBUS:
+				case SIGFPE:
+				case SIGWINCH:
+					ptrace(PTRACE_GETSIGINFO, pid, 0,
+					       (struct siginfo *)&si_ptrace);
+					si = &si_ptrace;
+					break;
+				default:
+					si = NULL;
+					break;
+				}
+			} else {
+				sig = 0;
+			}
+		}
+
+		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+		if (sig) {
 			switch (sig) {
 			case SIGSEGV:
-				get_skas_faultinfo(pid,
-						   ®s->faultinfo, aux_fp_regs);
-
-				if (PTRACE_FULL_FAULTINFO)
-					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
-							     regs);
+				if (using_seccomp || PTRACE_FULL_FAULTINFO)
+					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)si,
+						     regs);
 				else
 					segv(regs->faultinfo, 0, 1, NULL);
 
+				break;
+			case SIGSYS:
+				handle_syscall(regs);
 				break;
 			case SIGTRAP + 0x80:
 				handle_trap(pid, regs);
 				break;
 			case SIGTRAP:
-				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
+				relay_signal(SIGTRAP, (struct siginfo *)si, regs);
 				break;
 			case SIGALRM:
 				break;
@@ -447,7 +681,7 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 			case SIGFPE:
 			case SIGWINCH:
 				block_signals_trace();
-				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)si, regs);
 				unblock_signals_trace();
 				break;
 			default:
@@ -471,9 +705,15 @@ static int __init init_thread_regs(void)
 {
 	get_safe_registers(thread_regs.gp, thread_regs.fp);
 	/* Set parent's instruction pointer to start of clone-stub */
-	thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
-				(unsigned long) stub_clone_handler -
-				(unsigned long) __syscall_stub_start;
+	if (using_seccomp)
+		thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
+					(unsigned long) stub_clone_handler_seccomp -
+					(unsigned long) __syscall_stub_start;
+	else
+		thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
+					(unsigned long) stub_clone_handler -
+					(unsigned long) __syscall_stub_start;
+
 	thread_regs.gp[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
 		sizeof(void *);
 #ifdef __SIGNAL_FRAMESIZE
@@ -497,45 +737,50 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 	 * prepare offset and fd of child's stack as argument for parent's
 	 * and child's mmap2 calls
 	 */
-	*data = ((struct stub_data) {
-		.offset	= MMAP_OFFSET(new_offset),
-		.fd     = new_fd,
-		.err    = -ESRCH,
-		.child_err = 0,
-	});
-
-	*child_data = ((struct stub_data) {
-		.child_err = -ESRCH,
-	});
-
-	err = ptrace_setregs(from->u.pid, thread_regs.gp);
-	if (err < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
-		      __func__, from->u.pid, -err);
-		return err;
-	}
+	data->offset     = MMAP_OFFSET(new_offset);
+	data->fd         = new_fd;
+	data->err        = -ESRCH;
+	data->child_err  = 0;
 
-	err = put_fp_registers(from->u.pid, thread_regs.fp);
-	if (err < 0) {
-		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
-		       __func__, from->u.pid, err);
-		return err;
-	}
+	child_data->child_err = -ESRCH;
 
-	/*
-	 * Wait, until parent has finished its work: read child's pid from
-	 * parent's stack, and check, if bad result.
-	 */
-	err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
-	if (err) {
-		err = -errno;
-		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
-		       from->u.pid, errno);
-		return err;
-	}
+	if (using_seccomp) {
+		set_stub_state(&thread_regs, data, 0);
+
+		child_data->futex = FUTEX_IN_CHILD;
+
+		data->restart_wait = 0;
+		wait_stub_done_seccomp(from->u.pid, data, 0);
+	} else {
+		err = ptrace_setregs(from->u.pid, thread_regs.gp);
+		if (err < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
+			      __func__, from->u.pid, -err);
+			return err;
+		}
+
+		err = put_fp_registers(from->u.pid, thread_regs.fp);
+		if (err < 0) {
+			printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
+			       __func__, from->u.pid, err);
+			return err;
+		}
 
-	wait_stub_done(from->u.pid);
+		/*
+		 * Wait, until parent has finished its work: read child's pid from
+		 * parent's stack, and check, if bad result.
+		 */
+		err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
+		if (err) {
+			err = -errno;
+			printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
+			       from->u.pid, errno);
+			return err;
+		}
+
+		wait_stub_done(from->u.pid);
+	}
 
 	id->u.pid = data->err;
 	if (id->u.pid < 0) {
@@ -548,7 +793,11 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 	 * Wait, until child has finished too: read child's result from
 	 * child's stack and check it.
 	 */
-	wait_stub_done(id->u.pid);
+	if (using_seccomp)
+		wait_stub_done_seccomp(id->u.pid, child_data, 1);
+	else
+		wait_stub_done(id->u.pid);
+
 	if (child_data->child_err != STUB_DATA) {
 		printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
 		       __func__, id->u.pid, data->child_err);
@@ -556,7 +805,8 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 		goto out_kill;
 	}
 
-	if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
+	if (!using_seccomp &&
+	    ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
 		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 2a581ea1b0ef..3f8473a58de1 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -377,12 +377,9 @@ void __init os_early_checks(void)
 	using_seccomp = 0;
 
 	if (init_seccomp()) {
-		/* Not fully implemented */
-#if 0
 		using_seccomp = 1;
 
 		return;
-#endif
 	}
 #endif
 
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread
- * [PATCH 27/27] um: Delay flushing syscalls until the thread is restarted
  2021-03-03 15:54 [PATCH 00/27] Implement SECCOMP based userland Benjamin Berg
                   ` (25 preceding siblings ...)
  2021-03-03 15:55 ` [PATCH 26/27] um: Implement kernel side of SECCOMP based process handling Benjamin Berg
@ 2021-03-03 15:55 ` Benjamin Berg
  26 siblings, 0 replies; 30+ messages in thread
From: Benjamin Berg @ 2021-03-03 15:55 UTC (permalink / raw)
  To: linux-um; +Cc: Benjamin Berg
This way we can avoid doing two extra context switches when managing
processes using seccomp.
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
---
 arch/um/include/shared/skas/skas.h |  1 +
 arch/um/kernel/skas/process.c      |  8 ++++++++
 arch/um/kernel/tlb.c               | 10 +---------
 arch/um/os-Linux/skas/process.c    | 29 ++++++++++++++++++++++++++++-
 4 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index f10599995d4d..bc672d607101 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -21,5 +21,6 @@ extern void new_thread_handler(void);
 extern void handle_syscall(struct uml_pt_regs *regs);
 extern long execute_syscall_skas(void *r);
 extern unsigned long current_stub_stack(void);
+extern struct mm_id *current_mm_id(void);
 
 #endif
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index f2ac134c9752..c7345c83e07b 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -53,3 +53,11 @@ unsigned long current_stub_stack(void)
 
 	return current->mm->context.id.stack;
 }
+
+struct mm_id *current_mm_id(void)
+{
+	if (current->mm == NULL)
+		return NULL;
+
+	return ¤t->mm->context.id;
+}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index c15cac380fcd..bda516cb1186 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -460,7 +460,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 	pmd_t *pmd;
 	pte_t *pte;
 	struct mm_struct *mm = vma->vm_mm;
-	int r, w, x, prot, err = 0;
+	int r, w, x, prot;
 	struct mm_id *mm_id;
 
 	address &= PAGE_MASK;
@@ -508,14 +508,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 	} else if (pte_newprot(*pte))
 		protect(mm_id, address, PAGE_SIZE, prot);
 
-	err = syscall_stub_flush(mm_id);
-	if (err) {
-		if (err == -ENOMEM)
-			report_enomem();
-
-		goto kill;
-	}
-
 	*pte = pte_mkuptodate(*pte);
 
 	return;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index ceb75ac47b5f..0d3e137bd5b9 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -531,7 +531,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 			fatal_sigsegv();
 
 		if (using_seccomp) {
-			struct stub_data *proc_data = (void *)current_stub_stack();
+			struct mm_id *mm_id = current_mm_id();
+			struct stub_data *proc_data = (void *) mm_id->stack;
 			int ret;
 
 			set_stub_state(regs, proc_data, singlestepping(NULL));
@@ -540,6 +541,10 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 			if (proc_data->restart_wait != 0)
 				panic("Programming error: Flag to only run syscalls in child was not cleared!");
 
+			/* Mark pending syscalls for flushing */
+			proc_data->syscall_data_len = mm_id->syscall_data_len;
+			mm_id->syscall_data_len = 0;
+
 			proc_data->signal = 0;
 			proc_data->futex = FUTEX_IN_CHILD;
 			CATCH_EINTR(syscall(__NR_futex, &proc_data->futex,
@@ -552,6 +557,12 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 
 			sig = proc_data->signal;
 
+			if (sig == SIGTRAP && proc_data->err != 0) {
+				printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
+				       __func__);
+				fatal_sigsegv();
+			}
+
 			get_stub_state(regs, proc_data);
 
 			if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
@@ -569,6 +580,14 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 				GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
 			}
 		} else {
+			/* With ptrace, we need to explicitly flush all pending syscalls. */
+			err = syscall_stub_flush(current_mm_id());
+			if (err) {
+				printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+				       __func__, -err);
+				fatal_sigsegv();
+			}
+
 			/*
 			 * This can legitimately fail if the process loads a
 			 * bogus value into a segment register.  It will
@@ -733,6 +752,14 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 	unsigned long long new_offset;
 	int new_fd = phys_mapping(to_phys((void *)id->stack), &new_offset);
 
+	/* Flush out any pending syscalls before trying to run the stub. */
+	err = syscall_stub_flush(from);
+	if (err) {
+		printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+		       __func__, -err);
+		fatal_sigsegv();
+	}
+
 	/*
 	 * prepare offset and fd of child's stack as argument for parent's
 	 * and child's mmap2 calls
-- 
2.29.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related	[flat|nested] 30+ messages in thread