All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] de-asmify the x86-64 system call slowpath
@ 2014-01-26 22:28 Linus Torvalds
  2014-01-27  0:22 ` Al Viro
                   ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Linus Torvalds @ 2014-01-26 22:28 UTC (permalink / raw)
  To: Peter Anvin, Ingo Molnar, Thomas Gleixner, Peter Zijlstra
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 2092 bytes --]

The x86-64 (and 32-bit, for that matter) system call slowpaths are all
in C, but the *selection* of which slow-path to take is a mixture of
complicated assembler ("sysret_check -> sysret_careful ->
sysret_signal ->sysret_audit -> int_check_syscall_exit_work" etc), and
oddly named and placed C code ("schedule_user" vs
"__audit_syscall_exit" vs "do_notify_resume").

This attached patch tries to take the "do_notify_resume()" approach,
and renaming it to something sane ("syscall_exit_slowpath") and call
out to *all* the different slow cases from that one place, instead of
having some cases hardcoded in asm, and some in C. And instead of
hardcoding which cases result in a "iretq" and which cases result in a
faster sysret case, it's now simply a return value from that
syscall_exit_slowpath() function, so it's very natural and easy to say
"taking a signal will force us to do the slow iretq case, but we can
do the task exit work and still do the sysret".

I've marked this as an RFC, because I didn't bother trying to clean up
the 32-bit code similarly (no test-cases, and trust me, if you get
this wrong, it will fail spectacularly but in very subtle and
hard-to-debug ways), and I also didn't bother with the slow cases in
the "iretq" path, so that path still has the odd asm cases and calls
the old (now legacy) do_notify_resume() path.

But this is actually tested, and seems to work (including limited
testing with strace, gdb etc), and while it adds a few more lines than
it removes, the removed lines are mostly asm, and added lines are C
(and part of them are the temporary still-extant do_notify_resume()
wrapper). In particular, it should be fairly straightforward to take
this as a starting point, removing the extant
do_notify_resume/schedule/etc cases one by one, and get rid of more
asm code and finally the wrapper.

Comments? This was obviously brought on by my frustration with the
currently nasty do_notify_resume() always returning to iret for the
task_work case, and PeterZ's patch that fixed that, but made the asm
mess even *worse*.

                        Linus

[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 4573 bytes --]

 arch/x86/kernel/entry_64.S | 51 ++++++++++++++--------------------------------
 arch/x86/kernel/signal.c   | 37 ++++++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c3628bf2..15b5953da958 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -658,30 +658,24 @@ sysret_check:
 	/* Handle reschedules */
 	/* edx:	work, edi: workmask */
 sysret_careful:
-	bt $TIF_NEED_RESCHED,%edx
-	jnc sysret_signal
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq_cfi %rdi
-	SCHEDULE_USER
-	popq_cfi %rdi
-	jmp sysret_check
+	SAVE_REST
+	FIXUP_TOP_OF_STACK %r11
+	movq %rsp,%rdi
+	movl %edx,%esi
+	call syscall_exit_slowpath
+	movl $_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT,%edi
+	testl %eax,%eax
+	jne sysret_using_iret
+	addq $REST_SKIP,%rsp
+	jmp sysret_check;
 
-	/* Handle a signal */
-sysret_signal:
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-#ifdef CONFIG_AUDITSYSCALL
-	bt $TIF_SYSCALL_AUDIT,%edx
-	jc sysret_audit
-#endif
-	/*
-	 * We have a signal, or exit tracing or single-step.
-	 * These all wind up with the iret return path anyway,
-	 * so just join that path right now.
-	 */
-	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-	jmp int_check_syscall_exit_work
+sysret_using_iret:
+	RESTORE_REST
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	jmp int_with_check
 
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -703,20 +697,6 @@ auditsys:
 	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
-
-	/*
-	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
-	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
-	 * masked off.
-	 */
-sysret_audit:
-	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
-	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
-	setbe %al		/* 1 if so, 0 if not */
-	movzbl %al,%edi		/* zero-extend that into %edi */
-	call __audit_syscall_exit
-	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
-	jmp sysret_check
 #endif	/* CONFIG_AUDITSYSCALL */
 
 	/* Do syscall tracing */
@@ -786,7 +766,6 @@ int_careful:
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
 	SAVE_REST
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 9e5de6813e1f..25d02f6a65f1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -725,13 +725,30 @@ static void do_signal(struct pt_regs *regs)
 }
 
 /*
- * notification of userspace execution resumption
- * - triggered by the TIF_WORK_MASK flags
+ * This is the system call exit slowpath, called if any of
+ * the thread info flags are set that cause us to not just
+ * return using a 'sysret' instruction.
+ *
+ * Return zero if the user register state hasn't changed,
+ * and a 'sysret' is still ok. Return nonzero if we need
+ * to go to the slow 'iret' path due to possible register
+ * changes (signals, ptrace, whatever).
  */
-__visible void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
+__visible int syscall_exit_slowpath(struct pt_regs *regs, u32 thread_info_flags)
 {
+	int retval = 0;
+
 	user_exit();
+	if (thread_info_flags & _TIF_NEED_RESCHED)
+		schedule();
+
+	if (thread_info_flags & _TIF_WORK_SYSCALL_EXIT)
+		syscall_trace_leave(regs);
+
+#ifdef CONFIG_AUDITSYSCALL
+	if (thread_info_flags & _TIF_SYSCALL_AUDIT)
+		__audit_syscall_exit(regs->ax, regs->ax < -MAX_ERRNO);
+#endif
 
 #ifdef CONFIG_X86_MCE
 	/* notify userspace of pending MCEs */
@@ -743,8 +760,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		uprobe_notify_resume(regs);
 
 	/* deal with pending signal delivery */
-	if (thread_info_flags & _TIF_SIGPENDING)
+	if (thread_info_flags & _TIF_SIGPENDING) {
 		do_signal(regs);
+		retval = 1;
+	}
 
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
@@ -754,8 +773,16 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		fire_user_return_notifiers();
 
 	user_enter();
+	return retval;
 }
 
+/* Get rid of this old interface some day.. */
+__visible void do_notify_resume(struct pt_regs *regs, void *unused, u32 thread_info_flags)
+{
+	syscall_exit_slowpath(regs, thread_info_flags & ~(_TIF_NEED_RESCHED|_TIF_SYSCALL_AUDIT));
+}
+
+
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 {
 	struct task_struct *me = current;

^ permalink raw reply related	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2014-02-07 18:14 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-26 22:28 [RFC] de-asmify the x86-64 system call slowpath Linus Torvalds
2014-01-27  0:22 ` Al Viro
2014-01-27  4:32   ` Linus Torvalds
2014-01-27  4:48     ` H. Peter Anvin
2014-01-27  7:42     ` Al Viro
2014-01-27 22:06       ` Andy Lutomirski
2014-01-27 22:17         ` Linus Torvalds
2014-01-27 22:32           ` Al Viro
2014-01-27 22:43             ` Linus Torvalds
2014-01-27 22:46               ` Andy Lutomirski
2014-01-28  0:22                 ` H. Peter Anvin
2014-01-28  0:44                   ` Andy Lutomirski
2014-01-27 23:07               ` Al Viro
2014-01-27 10:27 ` Peter Zijlstra
2014-01-27 11:36   ` Al Viro
2014-01-27 17:39     ` Oleg Nesterov
2014-01-28  1:18       ` Al Viro
2014-01-28 16:38         ` Oleg Nesterov
2014-01-28 16:48           ` Al Viro
2014-01-28 17:19             ` Oleg Nesterov
2014-02-06  0:32 ` Linus Torvalds
2014-02-06  0:55   ` Kirill A. Shutemov
2014-02-06  2:32     ` Linus Torvalds
2014-02-06  4:33       ` Linus Torvalds
2014-02-06 21:29         ` Linus Torvalds
2014-02-06 22:24           ` Kirill A. Shutemov
2014-02-07  1:31             ` Linus Torvalds
2014-02-07 15:42               ` [RFC, PATCH] mm: map few pages around fault address if they are in page cache Kirill A. Shutemov
2014-02-07 17:32                 ` Andi Kleen
2014-02-07 17:56                   ` Kirill A. Shutemov
2014-02-07 18:11                     ` Andi Kleen
2014-02-06  5:42       ` [RFC] de-asmify the x86-64 system call slowpath Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.