All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@alien8.de>, Ingo Molnar <mingo@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Andrew Lutomirski <luto@kernel.org>, Peter Anvin <hpa@zytor.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Juergen Gross <jgross@suse.com>,
	Linux List Kernel Mailing <linux-kernel@vger.kernel.org>,
	He Zhe <zhe.he@windriver.com>,
	Joel Fernandes <joel@joelfernandes.org>,
	devel@etsukata.com
Subject: Re: [PATCH v2 5/7] x86/mm, tracing: Fix CR2 corruption
Date: Fri, 5 Jul 2019 15:49:16 +0200	[thread overview]
Message-ID: <20190705134916.GU3402@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <CAHk-=wiJ4no+TW-8KTfpO-Q5+aaTGVoBJzrnFTvj_zGpVbrGfA@mail.gmail.com>

On Fri, Jul 05, 2019 at 11:18:51AM +0900, Linus Torvalds wrote:
> On Fri, Jul 5, 2019 at 5:03 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > Despire the current efforts to read CR2 before tracing happens there
> > still exist a number of possible holes:
> 
> So this whole series disturbs me for the simple reason that I thought
> tracing was supposed to save/restore cr2 and make it unnecessary to
> worry about this in non-tracing code.
> 
> That is very much what the NMI code explicitly does. Why shouldn't all
> the other tracing code do the same thing in case they can take page
> faults?
> 
> So I don't think the patches are wrong per se, but this seems to solve
> it at the wrong level.

My thinking is that that results in far too many sites which we have to
fix and a possibly fragility of interface. Invariably we'll get multiple
interface for the same thing, one which preserves CR2 and one which
doesn't -- in the name of performance. And then someone uses the wrong
one, and we're back where we started.

Conversely, this way we get to fix it in one place.

Also; all previous attempts at fixing this have been about pushing the
read_cr2() earlier; notably:

  0ac09f9f8cd1 ("x86, trace: Fix CR2 corruption when tracing page faults")
  d4078e232267 ("x86, trace: Further robustify CR2 handling vs tracing")

And I'm thinking that with exception of this patch, the rest are
worthwhile cleanups regardless.

Also; while looking at this, if we do continue with the C wrappers from
the very last patch, we can do horrible things like this on top and move
the read_cr2() back into C code.


--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -826,7 +826,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
-.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
+.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0
 
 	.if \paranoid
 	call	paranoid_entry
@@ -836,10 +836,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
 	.endif
 	UNWIND_HINT_REGS
 
-	.if \read_cr2
-	GET_CR2_INTO(%rdx);			/* can clobber %rax */
-	.endif
-
 	.if \has_error_code
 	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
 	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
@@ -885,7 +881,6 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  *			fresh stack.  (This is for #DB, which has a nasty habit
  *			of recursing.)
  * @create_gap:		create a 6-word stack gap when coming from kernel mode.
- * @read_cr2:		load CR2 into the 3rd argument; done before calling any C code
  *
  * idtentry generates an IDT stub that sets up a usable kernel context,
  * creates struct pt_regs, and calls @do_sym.  The stub has the following
@@ -910,7 +905,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -948,7 +943,7 @@ ENTRY(\sym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
+	idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset
 
 	.if \paranoid == 1
 	/*
@@ -957,7 +952,7 @@ ENTRY(\sym)
 	 * run in real process context if user_mode(regs).
 	 */
 .Lfrom_usermode_switch_stack_\@:
-	idtentry_part \do_sym, \has_error_code, \read_cr2, 0
+	idtentry_part \do_sym, \has_error_code, paranoid=0
 	.endif
 
 _ASM_NOKPROBE(\sym)
@@ -969,7 +964,7 @@ idtentry overflow			do_overflow			has_er
 idtentry bounds				do_bounds			has_error_code=0
 idtentry invalid_op			do_invalid_op			has_error_code=0
 idtentry device_not_available		do_device_not_available		has_error_code=0
-idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2 read_cr2=1
+idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2
 idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0
 idtentry invalid_TSS			do_invalid_TSS			has_error_code=1
 idtentry segment_not_present		do_segment_not_present		has_error_code=1
@@ -1142,10 +1137,10 @@ idtentry xenint3		do_int3			has_error_co
 #endif
 
 idtentry general_protection	do_general_protection	has_error_code=1
-idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1
+idtentry page_fault		do_page_fault		has_error_code=1
 
 #ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
+idtentry async_page_fault	do_async_page_fault	has_error_code=1
 #endif
 
 #ifdef CONFIG_X86_MCE
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -22,20 +22,34 @@
 #define CALL_enter_from_user_mode(_regs)
 #endif
 
+#define __IDT_NR1 1
+#define __IDT_NR2 2
+#define __IDT_NR3 2
+
+#define IDT_NR(n) __IDT_NR##n
+
+#define __IDT_TRAP1(t1,a1)
+#define __IDT_TRAP2(t1,a1,t2,a2)
+#define __IDT_TRAP3(t1,a1,t2,a2,t3,a3)	t3 a3 = read_cr2()
+
+#define IDT_TRAP(n,...) __IDT_TRAP##n(__VA_ARGS__)
+
 #define IDTENTRYx(n, name, ...)	\
 	static notrace void __idt_##name(__IDT_MAP(n, __IDT_DECL, __VA_ARGS__)); \
 	NOKPROBE_SYMBOL(__idt_##name); \
-	dotraplinkage notrace void name(__IDT_MAP(n, __IDT_DECL, __VA_ARGS__)) \
+	dotraplinkage notrace void name(__IDT_MAP(__IDT_NR(n), __IDT_DECL, __VA_ARGS__)) \
 	{ \
 		__IDT_MAP(n, __IDT_TEST, __VA_ARGS__); \
+		__IDT_TRAP(n, __VA_ARGS__); \
 		trace_hardirqs_off(); \
 		CALL_enter_from_user_mode(regs); \
 		__idt_##name(__IDT_MAP(n, __IDT_ARGS, __VA_ARGS__)); \
 	} \
 	NOKPROBE_SYMBOL(name); \
-	dotraplinkage notrace void name##_paranoid(__IDT_MAP(n, __IDT_DECL, __VA_ARGS__)) \
+	dotraplinkage notrace void name##_paranoid(__IDT_MAP(__IDT_NR(n), __IDT_DECL, __VA_ARGS__)) \
 	{ \
 		__IDT_MAP(n, __IDT_TEST, __VA_ARGS__); \
+		__IDT_TRAP(n, __VA_ARGS__); \
 		trace_hardirqs_off(); \
 		__idt_##name(__IDT_MAP(n, __IDT_ARGS, __VA_ARGS__)); \
 	} \

  parent reply	other threads:[~2019-07-05 13:50 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-04 19:55 [PATCH v2 0/7] Tracing vs CR2 (and cleanups) Peter Zijlstra
2019-07-04 19:55 ` [PATCH v2 1/7] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
2019-07-04 21:49   ` Andy Lutomirski
2019-07-10 19:53   ` Steven Rostedt
2019-07-04 19:55 ` [PATCH v2 2/7] x86/entry/32: Simplify common_exception Peter Zijlstra
2019-07-04 21:51   ` Andy Lutomirski
2019-07-10 20:11   ` Steven Rostedt
2019-07-10 20:14     ` Peter Zijlstra
2019-07-04 19:55 ` [PATCH v2 3/7] x86/entry/64: Simplify idtentry a little Peter Zijlstra
2019-07-04 21:54   ` Andy Lutomirski
2019-07-10 20:23   ` Steven Rostedt
2019-07-04 19:55 ` [PATCH v2 4/7] x86/entry/64: Update comments and sanity tests for create_gap Peter Zijlstra
2019-07-04 21:55   ` Andy Lutomirski
2019-07-10 20:24   ` Steven Rostedt
2019-07-04 19:56 ` [PATCH v2 5/7] x86/mm, tracing: Fix CR2 corruption Peter Zijlstra
2019-07-05  2:18   ` Linus Torvalds
2019-07-05  3:16     ` Andy Lutomirski
2019-07-05  3:27       ` Linus Torvalds
2019-07-05 13:49     ` Peter Zijlstra [this message]
2019-07-06 21:41       ` Linus Torvalds
2019-07-06 22:27         ` Steven Rostedt
2019-07-06 22:41           ` Linus Torvalds
2019-07-07  0:08             ` Linus Torvalds
2019-07-07  0:36               ` Andy Lutomirski
2019-07-06 23:50           ` Andy Lutomirski
2019-07-07  3:44           ` Eiichi Tsukata
2019-07-06 11:07     ` Eiichi Tsukata
2019-07-08  7:48       ` Peter Zijlstra
2019-07-08  8:58         ` Eiichi Tsukata
2019-07-08  9:42           ` Eiichi Tsukata
2019-07-09  5:17             ` Eiichi Tsukata
2019-07-07 15:10   ` Andy Lutomirski
2019-07-07 15:11     ` Andy Lutomirski
2019-07-07 18:17       ` Linus Torvalds
2019-07-10 20:27         ` Steven Rostedt
2019-07-11  6:45           ` Greg Kroah-Hartman
2019-07-11 12:12           ` Sasha Levin
2019-07-11 12:21           ` Peter Zijlstra
2019-07-04 19:56 ` [PATCH v2 6/7] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Peter Zijlstra
2019-07-11  3:24   ` Steven Rostedt
2019-07-11  8:05     ` Peter Zijlstra
2019-07-04 19:56 ` [RFC][PATCH v2 7/7] x86/entry/64: Pull bits into C Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190705134916.GU3402@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=devel@etsukata.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=zhe.he@windriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.