From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
Borislav Petkov <bp@alien8.de>, Ingo Molnar <mingo@kernel.org>,
Steven Rostedt <rostedt@goodmis.org>,
Andrew Lutomirski <luto@kernel.org>, Peter Anvin <hpa@zytor.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Juergen Gross <jgross@suse.com>,
Linux List Kernel Mailing <linux-kernel@vger.kernel.org>,
He Zhe <zhe.he@windriver.com>,
Joel Fernandes <joel@joelfernandes.org>,
devel@etsukata.com
Subject: Re: [PATCH v2 5/7] x86/mm, tracing: Fix CR2 corruption
Date: Fri, 5 Jul 2019 15:49:16 +0200 [thread overview]
Message-ID: <20190705134916.GU3402@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <CAHk-=wiJ4no+TW-8KTfpO-Q5+aaTGVoBJzrnFTvj_zGpVbrGfA@mail.gmail.com>
On Fri, Jul 05, 2019 at 11:18:51AM +0900, Linus Torvalds wrote:
> On Fri, Jul 5, 2019 at 5:03 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > Despire the current efforts to read CR2 before tracing happens there
> > still exist a number of possible holes:
>
> So this whole series disturbs me for the simple reason that I thought
> tracing was supposed to save/restore cr2 and make it unnecessary to
> worry about this in non-tracing code.
>
> That is very much what the NMI code explicitly does. Why shouldn't all
> the other tracing code do the same thing in case they can take page
> faults?
>
> So I don't think the patches are wrong per se, but this seems to solve
> it at the wrong level.
My thinking is that that results in far too many sites which we have to
fix and a possibly fragility of interface. Invariably we'll get multiple
interface for the same thing, one which preserves CR2 and one which
doesn't -- in the name of performance. And then someone uses the wrong
one, and we're back where we started.
Conversely, this way we get to fix it in one place.
Also; all previous attempts at fixing this have been about pushing the
read_cr2() earlier; notably:
0ac09f9f8cd1 ("x86, trace: Fix CR2 corruption when tracing page faults")
d4078e232267 ("x86, trace: Further robustify CR2 handling vs tracing")
And I'm thinking that with exception of this patch, the rest are
worthwhile cleanups regardless.
Also; while looking at this, if we do continue with the C wrappers from
the very last patch, we can do horrible things like this on top and move
the read_cr2() back into C code.
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -826,7 +826,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
-.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
+.macro idtentry_part do_sym, has_error_code:req, paranoid:req, shift_ist=-1, ist_offset=0
.if \paranoid
call paranoid_entry
@@ -836,10 +836,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
.endif
UNWIND_HINT_REGS
- .if \read_cr2
- GET_CR2_INTO(%rdx); /* can clobber %rax */
- .endif
-
.if \has_error_code
movq ORIG_RAX(%rsp), %rsi /* get error code */
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
@@ -885,7 +881,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
* fresh stack. (This is for #DB, which has a nasty habit
* of recursing.)
* @create_gap: create a 6-word stack gap when coming from kernel mode.
- * @read_cr2: load CR2 into the 3rd argument; done before calling any C code
*
* idtentry generates an IDT stub that sets up a usable kernel context,
* creates struct pt_regs, and calls @do_sym. The stub has the following
@@ -910,7 +905,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -948,7 +943,7 @@ ENTRY(\sym)
.Lfrom_usermode_no_gap_\@:
.endif
- idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
+ idtentry_part \do_sym, \has_error_code, \paranoid, \shift_ist, \ist_offset
.if \paranoid == 1
/*
@@ -957,7 +952,7 @@ ENTRY(\sym)
* run in real process context if user_mode(regs).
*/
.Lfrom_usermode_switch_stack_\@:
- idtentry_part \do_sym, \has_error_code, \read_cr2, 0
+ idtentry_part \do_sym, \has_error_code, paranoid=0
.endif
_ASM_NOKPROBE(\sym)
@@ -969,7 +964,7 @@ idtentry overflow do_overflow has_er
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1
+idtentry double_fault do_double_fault has_error_code=1 paranoid=2
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1142,10 +1137,10 @@ idtentry xenint3 do_int3 has_error_co
#endif
idtentry general_protection do_general_protection has_error_code=1
-idtentry page_fault do_page_fault has_error_code=1 read_cr2=1
+idtentry page_fault do_page_fault has_error_code=1
#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1
+idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -22,20 +22,34 @@
#define CALL_enter_from_user_mode(_regs)
#endif
+#define __IDT_NR1 1
+#define __IDT_NR2 2
+#define __IDT_NR3 2
+
+#define IDT_NR(n) __IDT_NR##n
+
+#define __IDT_TRAP1(t1,a1)
+#define __IDT_TRAP2(t1,a1,t2,a2)
+#define __IDT_TRAP3(t1,a1,t2,a2,t3,a3) t3 a3 = read_cr2()
+
+#define IDT_TRAP(n,...) __IDT_TRAP##n(__VA_ARGS__)
+
#define IDTENTRYx(n, name, ...) \
static notrace void __idt_##name(__IDT_MAP(n, __IDT_DECL, __VA_ARGS__)); \
NOKPROBE_SYMBOL(__idt_##name); \
- dotraplinkage notrace void name(__IDT_MAP(n, __IDT_DECL, __VA_ARGS__)) \
+ dotraplinkage notrace void name(__IDT_MAP(__IDT_NR(n), __IDT_DECL, __VA_ARGS__)) \
{ \
__IDT_MAP(n, __IDT_TEST, __VA_ARGS__); \
+ __IDT_TRAP(n, __VA_ARGS__); \
trace_hardirqs_off(); \
CALL_enter_from_user_mode(regs); \
__idt_##name(__IDT_MAP(n, __IDT_ARGS, __VA_ARGS__)); \
} \
NOKPROBE_SYMBOL(name); \
- dotraplinkage notrace void name##_paranoid(__IDT_MAP(n, __IDT_DECL, __VA_ARGS__)) \
+ dotraplinkage notrace void name##_paranoid(__IDT_MAP(__IDT_NR(n), __IDT_DECL, __VA_ARGS__)) \
{ \
__IDT_MAP(n, __IDT_TEST, __VA_ARGS__); \
+ __IDT_TRAP(n, __VA_ARGS__); \
trace_hardirqs_off(); \
__idt_##name(__IDT_MAP(n, __IDT_ARGS, __VA_ARGS__)); \
} \
next prev parent reply other threads:[~2019-07-05 13:50 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-04 19:55 [PATCH v2 0/7] Tracing vs CR2 (and cleanups) Peter Zijlstra
2019-07-04 19:55 ` [PATCH v2 1/7] x86/paravirt: Make read_cr2() CALLEE_SAVE Peter Zijlstra
2019-07-04 21:49 ` Andy Lutomirski
2019-07-10 19:53 ` Steven Rostedt
2019-07-04 19:55 ` [PATCH v2 2/7] x86/entry/32: Simplify common_exception Peter Zijlstra
2019-07-04 21:51 ` Andy Lutomirski
2019-07-10 20:11 ` Steven Rostedt
2019-07-10 20:14 ` Peter Zijlstra
2019-07-04 19:55 ` [PATCH v2 3/7] x86/entry/64: Simplify idtentry a little Peter Zijlstra
2019-07-04 21:54 ` Andy Lutomirski
2019-07-10 20:23 ` Steven Rostedt
2019-07-04 19:55 ` [PATCH v2 4/7] x86/entry/64: Update comments and sanity tests for create_gap Peter Zijlstra
2019-07-04 21:55 ` Andy Lutomirski
2019-07-10 20:24 ` Steven Rostedt
2019-07-04 19:56 ` [PATCH v2 5/7] x86/mm, tracing: Fix CR2 corruption Peter Zijlstra
2019-07-05 2:18 ` Linus Torvalds
2019-07-05 3:16 ` Andy Lutomirski
2019-07-05 3:27 ` Linus Torvalds
2019-07-05 13:49 ` Peter Zijlstra [this message]
2019-07-06 21:41 ` Linus Torvalds
2019-07-06 22:27 ` Steven Rostedt
2019-07-06 22:41 ` Linus Torvalds
2019-07-07 0:08 ` Linus Torvalds
2019-07-07 0:36 ` Andy Lutomirski
2019-07-06 23:50 ` Andy Lutomirski
2019-07-07 3:44 ` Eiichi Tsukata
2019-07-06 11:07 ` Eiichi Tsukata
2019-07-08 7:48 ` Peter Zijlstra
2019-07-08 8:58 ` Eiichi Tsukata
2019-07-08 9:42 ` Eiichi Tsukata
2019-07-09 5:17 ` Eiichi Tsukata
2019-07-07 15:10 ` Andy Lutomirski
2019-07-07 15:11 ` Andy Lutomirski
2019-07-07 18:17 ` Linus Torvalds
2019-07-10 20:27 ` Steven Rostedt
2019-07-11 6:45 ` Greg Kroah-Hartman
2019-07-11 12:12 ` Sasha Levin
2019-07-11 12:21 ` Peter Zijlstra
2019-07-04 19:56 ` [PATCH v2 6/7] x86/entry/64: Remove TRACE_IRQS_*_DEBUG Peter Zijlstra
2019-07-11 3:24 ` Steven Rostedt
2019-07-11 8:05 ` Peter Zijlstra
2019-07-04 19:56 ` [RFC][PATCH v2 7/7] x86/entry/64: Pull bits into C Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190705134916.GU3402@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=devel@etsukata.com \
--cc=hpa@zytor.com \
--cc=jgross@suse.com \
--cc=joel@joelfernandes.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@kernel.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=zhe.he@windriver.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.