[ABOMINATION] x86: Fast interrupt return to userspace

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [ABOMINATION] x86: Fast interrupt return to userspace
@ 2014-05-06 20:29 Andy Lutomirski
  2014-05-06 20:35 ` Linus Torvalds
  0 siblings, 1 reply; 11+ messages in thread
From: Andy Lutomirski @ 2014-05-06 20:29 UTC (permalink / raw)
  To: Linus Torvalds, Thomas Gleixner, Linux Kernel Mailing List, x86,
	Steven Rostedt, Gleb Natapov, Paolo Bonzini
  Cc: Andy Lutomirski

This could be even faster if it were written in assembler :)

The only reason it's Signed-off-by is that I agree to the DCO.
That should not be construed to mean that anyone should apply
this patch.  It's an abomination and it will do terrible,
terrible things.

It boots, though :)  I haven't tested it beyond that.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/calling.h    | 10 ++++++++++
 arch/x86/kernel/entry_64.S        | 14 ++++++++++++++
 arch/x86/kernel/process_64.c      | 37 +++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/vsyscall_64.c     |  2 +-
 arch/x86/kernel/vsyscall_emu_64.S |  5 +++++
 5 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index cb4c73b..ead0345 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -46,7 +46,9 @@ For 32-bit we have the following conventions - kernel is built with
 
 */
 
+#ifdef __ASSEMBLY__
 #include <asm/dwarf2.h>
+#endif
 
 #ifdef CONFIG_X86_64
 
@@ -85,6 +87,8 @@ For 32-bit we have the following conventions - kernel is built with
 #define ARGOFFSET	R11
 #define SWFRAME		ORIG_RAX
 
+#ifdef __ASSEMBLY__
+
 	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1
 	subq  $9*8+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
@@ -195,8 +199,12 @@ For 32-bit we have the following conventions - kernel is built with
 	.byte 0xf1
 	.endm
 
+#endif /* __ASSEMBLY__ */
+
 #else /* CONFIG_X86_64 */
 
+#ifdef __ASSEMBLY__
+
 /*
  * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
  * are different from the entry_32.S versions in not changing the segment
@@ -240,5 +248,7 @@ For 32-bit we have the following conventions - kernel is built with
 	CFI_RESTORE eax
 	.endm
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c36..7e3eae1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1027,6 +1027,9 @@ retint_swapgs:		/* return to user-space */
 	 */
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
+	call install_sysret_trampoline
+	test %rax,%rax
+	jnz iret_via_sysret
 	SWAPGS
 	jmp restore_args
 
@@ -1036,6 +1039,7 @@ retint_restore_args:	/* return to kernel space */
 	 * The iretq could re-enable interrupts:
 	 */
 	TRACE_IRQS_IRETQ
+
 restore_args:
 	RESTORE_ARGS 1,8,1
 
@@ -1043,6 +1047,16 @@ irq_return:
 	INTERRUPT_RETURN
 	_ASM_EXTABLE(irq_return, bad_iret)
 
+iret_via_sysret:
+	SWAPGS
+	RESTORE_ARGS 1,8,1
+	popq %rcx /* RIP */
+	popq %r11 /* CS */
+	popq %r11 /* RFLAGS */
+	popq %rsp /* RSP */
+	          /* ignore SS */
+	sysretq
+
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
 	iretq
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c0280f..e48aced 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -562,3 +562,40 @@ unsigned long KSTK_ESP(struct task_struct *task)
 	return (test_tsk_thread_flag(task, TIF_IA32)) ?
 			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
 }
+
+#include <asm/calling.h>
+
+unsigned long notrace install_sysret_trampoline(void)
+{
+	unsigned long *here = __builtin_frame_address(0);
+	unsigned long *asmframe = here + 2;
+	unsigned long __user * newrsp;
+
+#define FRAMEVAL(x) asmframe[((x)-ARGOFFSET) / 8]
+	newrsp =  (unsigned long __user * __force)(FRAMEVAL(RSP) - 128 - 3*8);
+
+	if (FRAMEVAL(CS) != __USER_CS)
+		return 0;
+
+	/*
+	 * A real implementation would do:
+	 * if (!access_ok(VERIFY_WRITE, newrsp, 3*8))
+	 *		return 0;
+	 */
+
+	if (__put_user(FRAMEVAL(RIP), newrsp + 2))
+		return 0;
+
+	if (__put_user(FRAMEVAL(R11), newrsp + 1))
+		return 0;
+
+	if (__put_user(FRAMEVAL(RCX), newrsp))
+		return 0;
+
+	/* Hi there, optimizer. */
+	ACCESS_ONCE(FRAMEVAL(RIP)) = 0xffffffffff600c00;
+	ACCESS_ONCE(FRAMEVAL(RSP)) = (unsigned long)newrsp;
+	return 1;
+
+#undef FRAMEVAL
+}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8b3b3eb..77a5ef3 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -54,7 +54,7 @@
 
 DEFINE_VVAR(int, vgetcpu_mode);
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
 
 static int __init vsyscall_setup(char *str)
 {
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index c9596a9..a54a780 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -32,6 +32,11 @@ __vsyscall_page:
 	syscall
 	ret
 
+	.balign 1024, 0xcc
+	popq %rcx
+	popq %r11
+	retq $128
+
 	.balign 4096, 0xcc
 
 	.size __vsyscall_page, 4096
-- 
1.9.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 20:29 [ABOMINATION] x86: Fast interrupt return to userspace Andy Lutomirski
@ 2014-05-06 20:35 ` Linus Torvalds
  2014-05-06 21:00   ` Linus Torvalds
  0 siblings, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2014-05-06 20:35 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

On Tue, May 6, 2014 at 1:29 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> The only reason it's Signed-off-by is that I agree to the DCO.
> That should not be construed to mean that anyone should apply
> this patch.  It's an abomination and it will do terrible,
> terrible things.

Heh. That is pretty disgusting. But I guess it could be interesting
for timing. BRB.

          Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 20:35 ` Linus Torvalds
@ 2014-05-06 21:00   ` Linus Torvalds
  2014-05-06 21:25     ` Linus Torvalds
  0 siblings, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2014-05-06 21:00 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 1331 bytes --]

On Tue, May 6, 2014 at 1:35 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Heh. That is pretty disgusting. But I guess it could be interesting
> for timing. BRB.

Ooh. That's friggin impressive.

Guys, see if you can recreate these numbers. This is my totally
disgusting test-case, which really is just stress-testing page faults
and nothing else.

Silly C file attached, see the comment at the top of it. Then just do
"time ./a.out". It's designed to map the zero-page and access it. The
"start" thing was to make sure it's not hugepage-aligned, but that's
not actually enough with a big 1GB area, so you do need that whole
"echo never" thing since there will be tons of aligned areas that the
kernel will make noops for this case otherwise.

Anyway, on my Haswell with normal "iret", that program takes 8.4+-0.1 seconds.

With the disgusting sysret hackery, it takes 6.5+-0.1 seconds. That's
a rather impressive 23% performance improvement for page faulting.

I'll do profiles and test the kernel compile too, but the raw timings
are certainly promising. The "sysret" hack is pretty disgusting, and
it's broken too. sysret doesn't do some things iret does (like TF flag
etc), so it's not complete, but it's clearly good enough to run tests
on. It will definitely break ptrace() and friends.

                Linus

[-- Attachment #2: t.c --]
[-- Type: text/x-csrc, Size: 669 bytes --]

//
// Make sure to do
//
//     echo never >/sys/kernel/mm/transparent_hugepage/enabled
//
// to disable THP for this stupid test-case.

#include <stdio.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <unistd.h>

#define SIZE (1024*1024*1024)

int main(int argc, char **argv)
{
	void *addr, *start;
	int i;

	start = 8192 + mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
	start = (void *)(8192 | (unsigned long) start);

	for (i = 0; i < 100; i++) {
		unsigned int j;
		addr = mmap(start, SIZE, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
		for (j = 0; j < SIZE; j += 4096) {
			*(volatile int *)(j+addr);
		}
		munmap(addr, SIZE);
	}
	return 0;
}

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 21:00   ` Linus Torvalds
@ 2014-05-06 21:25     ` Linus Torvalds
  2014-05-06 21:34       ` Andy Lutomirski
  2014-05-06 21:40       ` Linus Torvalds
  0 siblings, 2 replies; 11+ messages in thread
From: Linus Torvalds @ 2014-05-06 21:25 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

On Tue, May 6, 2014 at 2:00 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> I'll do profiles and test the kernel compile too, but the raw timings
> are certainly promising. The "sysret" hack is pretty disgusting, and
> it's broken too. sysret doesn't do some things iret does (like TF flag
> etc), so it's not complete, but it's clearly good enough to run tests
> on. It will definitely break ptrace() and friends.

It clearly breaks other things too, and there seems to be bugs in
there. I got this, for example:

WARNING: CPU: 2 PID: 1174 at kernel/smp.c:230
smp_call_function_single+0x81/0xa0()
CPU: 2 PID: 1174 Comm: gdbus Tainted: G        W
3.15.0-rc4-00260-g38583f095c5a-dirty #2
Call Trace:
  dump_stack+0x45/0x56
  warn_slowpath_common+0x73/0x90
  warn_slowpath_null+0x15/0x20
  smp_call_function_single+0x81/0xa0
  smp_call_function_many+0x21c/0x260
  flush_tlb_page+0x6d/0xb0
  ptep_clear_flush+0x2c/0x40
  do_wp_page+0x208/0x6e0
  handle_mm_fault+0x79c/0x1060
  __do_page_fault+0x15e/0x510
  do_page_fault+0xc/0x10
  page_fault+0x22/0x30
  retint_swapgs+0x6/0x10

which is because interrupts are disabled in that
install_sysret_trampoline() path that touches user space. It's
possibly sufficient to just move the "cli" to below the call to it. I
eventually ended up with a hung machine, possibly related to this,
possibly something else.

Whatever. I got enough profile data to say that it seems to have cut
'iret' overhead by at least two thirds. So it may not *work*, but from
a "hey look, some random numbers" standpoint it is worth playing with.

             Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 21:25     ` Linus Torvalds
@ 2014-05-06 21:34       ` Andy Lutomirski
  2014-05-06 21:48         ` Linus Torvalds
  2014-05-07 11:14         ` Ingo Molnar
  2014-05-06 21:40       ` Linus Torvalds
  1 sibling, 2 replies; 11+ messages in thread
From: Andy Lutomirski @ 2014-05-06 21:34 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

On Tue, May 6, 2014 at 2:25 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Tue, May 6, 2014 at 2:00 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>>
>> I'll do profiles and test the kernel compile too, but the raw timings
>> are certainly promising. The "sysret" hack is pretty disgusting, and
>> it's broken too. sysret doesn't do some things iret does (like TF flag
>> etc), so it's not complete, but it's clearly good enough to run tests
>> on. It will definitely break ptrace() and friends.
>
> It clearly breaks other things too, and there seems to be bugs in
> there. I got this, for example:
>
> WARNING: CPU: 2 PID: 1174 at kernel/smp.c:230
> smp_call_function_single+0x81/0xa0()
> CPU: 2 PID: 1174 Comm: gdbus Tainted: G        W
> 3.15.0-rc4-00260-g38583f095c5a-dirty #2
> Call Trace:
>   dump_stack+0x45/0x56
>   warn_slowpath_common+0x73/0x90
>   warn_slowpath_null+0x15/0x20
>   smp_call_function_single+0x81/0xa0
>   smp_call_function_many+0x21c/0x260
>   flush_tlb_page+0x6d/0xb0
>   ptep_clear_flush+0x2c/0x40
>   do_wp_page+0x208/0x6e0
>   handle_mm_fault+0x79c/0x1060
>   __do_page_fault+0x15e/0x510
>   do_page_fault+0xc/0x10
>   page_fault+0x22/0x30
>   retint_swapgs+0x6/0x10
>
> which is because interrupts are disabled in that
> install_sysret_trampoline() path that touches user space. It's
> possibly sufficient to just move the "cli" to below the call to it. I
> eventually ended up with a hung machine, possibly related to this,
> possibly something else.

I don't think that's enough to fix this -- interrupts may not have
been on in the first place, I think.  I wonder if __put_user_inatomic
would work here.

Also, sysexit might be better than sysret.  And I categorically refuse
to add any new feature that requires vsyscall=native, so this would
have to use the vdso instead.  Plus it's awful.

>
> Whatever. I got enough profile data to say that it seems to have cut
> 'iret' overhead by at least two thirds. So it may not *work*, but from
> a "hey look, some random numbers" standpoint it is worth playing with.
>

:)

Is there actual interest in turning something like this into a real
patch?  It would almost certainly have to default off and no one sane
would ever use it except for special-purpose machines.

--Andy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 21:25     ` Linus Torvalds
  2014-05-06 21:34       ` Andy Lutomirski
@ 2014-05-06 21:40       ` Linus Torvalds
  1 sibling, 0 replies; 11+ messages in thread
From: Linus Torvalds @ 2014-05-06 21:40 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

On Tue, May 6, 2014 at 2:25 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Whatever. I got enough profile data to say that it seems to have cut
> 'iret' overhead by at least two thirds. So it may not *work*, but from
> a "hey look, some random numbers" standpoint it is worth playing with.

Just to clarify: that's just the "iret" part. On both the kernel build
and my microbenchmark, the fault itself is the more expensive part.

But on my microbenchmark, it used to be 80% page fault overhead, where
the split was roughly 55/25 percentage points. So the cost of "iret"
was almost a third of the total page fault cost, and roughly a quarter
of the total cost on that microbeanchmark.

And remember: that microbenchmark improved absolute performance by
about 23%. That already implied that most of iret just went away. The
(partial) profile numbers I did get before the machine hung seem to
back that up, with the page fault overhead now looking like 67% of
total (but remember: it's a smaller total, so the real improvement is
bigger than that 80% -> 67% thing), with the percentage point split
being roughly 64/3. So with that sysret trick, the 'iret" overhead is
pretty much negligible - it went from a third of the page fault cost
to 1/20th or so.

But the fault itself obviously doesn't speed up.

                  Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 21:34       ` Andy Lutomirski
@ 2014-05-06 21:48         ` Linus Torvalds
  2014-05-06 22:49           ` Steven Rostedt
  2014-05-07 11:14         ` Ingo Molnar
  1 sibling, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2014-05-06 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

On Tue, May 6, 2014 at 2:34 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> I don't think that's enough to fix this -- interrupts may not have
> been on in the first place, I think.  I wonder if __put_user_inatomic
> would work here.

That might be the way to go, yes.

And in addition to the CS value you should probably check that TF and
RF are clear in eflags, and possibly other things too that the sysret
doesn't get right compared to iret.

> Also, sysexit might be better than sysret.  And I categorically refuse
> to add any new feature that requires vsyscall=native, so this would
> have to use the vdso instead.  Plus it's awful.

Yeah, it's disgusting. I think it is worth fixing to get better
numbers (it *would* be very interesting to hear whether this plus the
kernel 'retq' thing actually makes real device interrupt overhead
lower), but I'd be very wary of using it in production.

> Is there actual interest in turning something like this into a real
> patch?  It would almost certainly have to default off and no one sane
> would ever use it except for special-purpose machines.

I don't think it's necessarily worth actually merging, but maybe a
couple of improvements makes it go from "disgusting and unmergeable"
to just "disgusting". The "avoid iret for return to kernel mode" thing
might be more easily merged (but needs similar CS content checks to
make sure we're ok with BIOS/EFI execution environments)

            Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 21:48         ` Linus Torvalds
@ 2014-05-06 22:49           ` Steven Rostedt
  2014-05-06 23:37             ` Måns Rullgård
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Rostedt @ 2014-05-06 22:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Gleb Natapov, Paolo Bonzini

On Tue, 6 May 2014 14:48:50 -0700
Linus Torvalds <torvalds@linux-foundation.org> wrote:


> Yeah, it's disgusting. I think it is worth fixing to get better
> numbers (it *would* be very interesting to hear whether this plus the
> kernel 'retq' thing actually makes real device interrupt overhead
> lower), but I'd be very wary of using it in production.

Darn it. I was actually hoping that someone else could have the
reputation of doing "I can't believe they actually did this" code in the
kernel.

-- Steve

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 22:49           ` Steven Rostedt
@ 2014-05-06 23:37             ` Måns Rullgård
  0 siblings, 0 replies; 11+ messages in thread
From: Måns Rullgård @ 2014-05-06 23:37 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Linus Torvalds, Andy Lutomirski, Thomas Gleixner,
	Linux Kernel Mailing List, the arch/x86 maintainers, Gleb Natapov,
	Paolo Bonzini

Steven Rostedt <rostedt@goodmis.org> writes:

> On Tue, 6 May 2014 14:48:50 -0700
> Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
>> Yeah, it's disgusting. I think it is worth fixing to get better
>> numbers (it *would* be very interesting to hear whether this plus the
>> kernel 'retq' thing actually makes real device interrupt overhead
>> lower), but I'd be very wary of using it in production.
>
> Darn it. I was actually hoping that someone else could have the
> reputation of doing "I can't believe they actually did this" code in the
> kernel.

Hey, I tried: http://article.gmane.org/gmane.linux.kernel/1594134

-- 
Måns Rullgård
mans@mansr.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-06 21:34       ` Andy Lutomirski
  2014-05-06 21:48         ` Linus Torvalds
@ 2014-05-07 11:14         ` Ingo Molnar
  2014-05-07 15:27           ` Andy Lutomirski
  1 sibling, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2014-05-07 11:14 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini


* Andy Lutomirski <luto@amacapital.net> wrote:

> > Whatever. I got enough profile data to say that it seems to have 
> > cut 'iret' overhead by at least two thirds. So it may not *work*, 
> > but from a "hey look, some random numbers" standpoint it is worth 
> > playing with.
> 
> :)
> 
> Is there actual interest in turning something like this into a real 
> patch?  It would almost certainly have to default off and no one 
> sane would ever use it except for special-purpose machines.

The macro speedup looks rather impressive, and we've done ugly things 
for far smaller speedups.

But I don't think it should be a 'special mode'. It either is made to 
work unconditionally and can be a prime speedup to be proud of in a 
politely disgusted fashion, or we don't want the complexity (and 
future bitrot) of some special switch.

At minimum it can be a "look we want this speedup in hardware" 
testcase to CPU designers.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [ABOMINATION] x86: Fast interrupt return to userspace
  2014-05-07 11:14         ` Ingo Molnar
@ 2014-05-07 15:27           ` Andy Lutomirski
  0 siblings, 0 replies; 11+ messages in thread
From: Andy Lutomirski @ 2014-05-07 15:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Thomas Gleixner, Linux Kernel Mailing List,
	the arch/x86 maintainers, Steven Rostedt, Gleb Natapov,
	Paolo Bonzini

On Wed, May 7, 2014 at 4:14 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Andy Lutomirski <luto@amacapital.net> wrote:
>
>> > Whatever. I got enough profile data to say that it seems to have
>> > cut 'iret' overhead by at least two thirds. So it may not *work*,
>> > but from a "hey look, some random numbers" standpoint it is worth
>> > playing with.
>>
>> :)
>>
>> Is there actual interest in turning something like this into a real
>> patch?  It would almost certainly have to default off and no one
>> sane would ever use it except for special-purpose machines.
>
> The macro speedup looks rather impressive, and we've done ugly things
> for far smaller speedups.
>
> But I don't think it should be a 'special mode'. It either is made to
> work unconditionally and can be a prime speedup to be proud of in a
> politely disgusted fashion, or we don't want the complexity (and
> future bitrot) of some special switch.

The problem is that we'll break anything that expects to be able to
use more than the specified 128-byte redzone.  Doing that is currently
safe as long as no signals are delivered.

I could imagine something like Go blowing up badly.

We might be able to get away with automatically disabling this thing
if sigaltstack is enabled.

--Andy

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-05-07 15:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-06 20:29 [ABOMINATION] x86: Fast interrupt return to userspace Andy Lutomirski
2014-05-06 20:35 ` Linus Torvalds
2014-05-06 21:00   ` Linus Torvalds
2014-05-06 21:25     ` Linus Torvalds
2014-05-06 21:34       ` Andy Lutomirski
2014-05-06 21:48         ` Linus Torvalds
2014-05-06 22:49           ` Steven Rostedt
2014-05-06 23:37             ` Måns Rullgård
2014-05-07 11:14         ` Ingo Molnar
2014-05-07 15:27           ` Andy Lutomirski
2014-05-06 21:40       ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox