From: Mike Rapoport <rppt@linux.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>,
Andy Lutomirski <luto@kernel.org>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
James Bottomley <James.Bottomley@hansenpartnership.com>,
Jonathan Adams <jwadams@google.com>,
Kees Cook <keescook@chromium.org>, Paul Turner <pjt@google.com>,
Peter Zijlstra <peterz@infradead.org>,
Thomas Gleixner <tglx@linutronix.de>,
linux-mm@kvack.org, linux-security-module@vger.kernel.org,
x86@kernel.org, Mike Rapoport <rppt@linux.ibm.com>
Subject: [RFC PATCH 3/7] x86/entry/64: add infrastructure for switching to isolated syscall context
Date: Fri, 26 Apr 2019 00:45:50 +0300 [thread overview]
Message-ID: <1556228754-12996-4-git-send-email-rppt@linux.ibm.com> (raw)
In-Reply-To: <1556228754-12996-1-git-send-email-rppt@linux.ibm.com>
The isolated system calls will use a separate page table that does not map
the entire kernel. Exception and interrupts entries should switch the
context to the full kernel page tables and then restore it back to continue
the execution of the isolated system call.
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
arch/x86/entry/calling.h | 65 ++++++++++++++++++++++++++++++++++
arch/x86/entry/entry_64.S | 13 +++++--
arch/x86/include/asm/processor-flags.h | 8 +++++
arch/x86/include/asm/tlbflush.h | 8 ++++-
arch/x86/kernel/asm-offsets.c | 7 ++++
5 files changed, 98 insertions(+), 3 deletions(-)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b..766e74e 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -187,6 +187,56 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm
+#ifdef CONFIG_SYSCALL_ISOLATION
+
+#define SCI_PCID_BIT X86_CR3_SCI_PCID_BIT
+
+#define THIS_CPU_sci_syscall \
+ PER_CPU_VAR(cpu_sci) + SCI_SYSCALL
+
+#define THIS_CPU_sci_cr3_offset \
+ PER_CPU_VAR(cpu_sci) + SCI_CR3_OFFSET
+
+.macro SAVE_AND_SWITCH_SCI_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+ ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_SCI
+ movq THIS_CPU_sci_syscall, \scratch_reg
+ cmpq $0, \scratch_reg
+ je .Ldone_\@
+ movq %cr3, \scratch_reg
+ bt $SCI_PCID_BIT, \scratch_reg
+ jc .Lsci_context_\@
+ xorq \save_reg, \save_reg
+ jmp .Ldone_\@
+.Lsci_context_\@:
+ movq \scratch_reg, \save_reg
+ addq THIS_CPU_sci_cr3_offset, \scratch_reg
+ movq \scratch_reg, %cr3
+.Ldone_\@:
+.endm
+
+.macro RESTORE_SCI_CR3 scratch_reg:req save_reg:req
+ ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_SCI
+ movq THIS_CPU_sci_syscall, \scratch_reg
+ cmpq $0, \scratch_reg
+ je .Ldone_\@
+ movq \save_reg, \scratch_reg
+ cmpq $0, \scratch_reg
+ je .Ldone_\@
+ xorq \save_reg, \save_reg
+ movq \scratch_reg, %cr3
+.Ldone_\@:
+.endm
+
+#else /* CONFIG_SYSCALL_ISOLATION */
+
+.macro SAVE_AND_SWITCH_SCI_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+
+.macro RESTORE_SCI_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif /* CONFIG_SYSCALL_ISOLATION */
+
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
@@ -264,6 +314,21 @@ For 32-bit we have the following conventions - kernel is built with
ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
movq %cr3, \scratch_reg
movq \scratch_reg, \save_reg
+
+#ifdef CONFIG_SYSCALL_ISOLATION
+ /*
+ * Test the SCI PCID bit. If set, then the SCI page tables are
+ * active. If clear CR3 has either the kernel or user page
+ * table active.
+ */
+ ALTERNATIVE "jmp .Lcheck_user_pt_\@", "", X86_FEATURE_SCI
+ bt $SCI_PCID_BIT, \scratch_reg
+ jnc .Lcheck_user_pt_\@
+ addq THIS_CPU_sci_cr3_offset, \scratch_reg
+ movq \scratch_reg, %cr3
+ jmp .Ldone_\@
+.Lcheck_user_pt_\@:
+#endif
/*
* Test the user pagetable bit. If set, then the user page tables
* are active. If clear CR3 already has the kernel page table
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb..3cef67b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -543,7 +543,7 @@ ENTRY(interrupt_entry)
ENCODE_FRAME_POINTER 8
testb $3, CS+8(%rsp)
- jz 1f
+ jz .Linterrupt_entry_kernel
/*
* IRQ from user mode.
@@ -559,12 +559,17 @@ ENTRY(interrupt_entry)
CALL_enter_from_user_mode
-1:
+.Linterrupt_entry_done:
ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
ret
+
+.Linterrupt_entry_kernel:
+ SAVE_AND_SWITCH_SCI_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+ jmp .Linterrupt_entry_done
+
END(interrupt_entry)
_ASM_NOKPROBE(interrupt_entry)
@@ -656,6 +661,8 @@ retint_kernel:
*/
TRACE_IRQS_IRETQ
+ RESTORE_SCI_CR3 scratch_reg=%rax save_reg=%r14
+
GLOBAL(restore_regs_and_return_to_kernel)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates kernel mode. */
@@ -1263,6 +1270,8 @@ ENTRY(error_entry)
* for these here too.
*/
.Lerror_kernelspace:
+ SAVE_AND_SWITCH_SCI_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
leaq native_irq_return_iret(%rip), %rcx
cmpq %rcx, RIP+8(%rsp)
je .Lerror_bad_iret
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 02c2cbd..eca9e17 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -53,4 +53,12 @@
# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
+#ifdef CONFIG_SYSCALL_ISOLATION
+# if defined(X86_CR3_PTI_PCID_USER_BIT)
+# define X86_CR3_SCI_PCID_BIT (X86_CR3_PTI_PCID_USER_BIT - 1)
+# else
+# define X86_CR3_SCI_PCID_BIT 11
+# endif
+#endif
+
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf..dc69cc4 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -54,7 +54,13 @@
# define PTI_CONSUMED_PCID_BITS 0
#endif
-#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+#ifdef CONFIG_SYSCALL_ISOLATION
+# define SCI_CONSUMED_PCID_BITS 1
+#else
+# define SCI_CONSUMED_PCID_BITS 0
+#endif
+
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS - SCI_CONSUMED_PCID_BITS)
/*
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d..f2c9cd3f 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -18,6 +18,7 @@
#include <asm/bootparam.h>
#include <asm/suspend.h>
#include <asm/tlbflush.h>
+#include <asm/sci.h>
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -105,4 +106,10 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+
+#ifdef CONFIG_SYSCALL_ISOLATION
+ /* system calls isolation */
+ OFFSET(SCI_SYSCALL, sci_percpu_data, sci_syscall);
+ OFFSET(SCI_CR3_OFFSET, sci_percpu_data, sci_cr3_offset);
+#endif
}
--
2.7.4
next prev parent reply other threads:[~2019-04-25 21:46 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-25 21:45 [RFC PATCH 0/7] x86: introduce system calls addess space isolation Mike Rapoport
2019-04-25 21:45 ` [RFC PATCH 1/7] x86/cpufeatures: add X86_FEATURE_SCI Mike Rapoport
2019-04-25 21:45 ` [RFC PATCH 2/7] x86/sci: add core implementation for system call isolation Mike Rapoport
2019-04-26 7:49 ` Peter Zijlstra
2019-04-28 5:45 ` Mike Rapoport
2019-04-26 8:31 ` Ingo Molnar
2019-04-26 9:58 ` Ingo Molnar
2019-04-26 21:26 ` Andy Lutomirski
2019-04-27 8:47 ` Ingo Molnar
2019-04-27 10:46 ` Ingo Molnar
2019-04-29 18:26 ` James Morris
2019-04-29 18:43 ` Andy Lutomirski
2019-04-29 18:46 ` Andy Lutomirski
2019-04-30 5:03 ` Ingo Molnar
2019-04-30 9:38 ` Peter Zijlstra
2019-04-30 11:05 ` Ingo Molnar
2019-05-02 11:35 ` Robert O'Callahan
2019-05-02 15:20 ` Ingo Molnar
2019-05-02 21:07 ` Robert O'Callahan
2019-04-26 14:44 ` James Bottomley
2019-04-26 14:46 ` Dave Hansen
2019-04-26 14:57 ` James Bottomley
2019-04-26 15:07 ` Andy Lutomirski
2019-04-26 15:19 ` James Bottomley
2019-04-26 17:40 ` Andy Lutomirski
2019-04-26 18:49 ` James Bottomley
2019-04-26 19:22 ` Andy Lutomirski
2019-04-25 21:45 ` Mike Rapoport [this message]
2019-04-25 21:45 ` [RFC PATCH 4/7] x86/sci: hook up isolated system call entry and exit Mike Rapoport
2019-04-25 21:45 ` [RFC PATCH 5/7] x86/mm/fault: hook up SCI verification Mike Rapoport
2019-04-26 7:42 ` Peter Zijlstra
2019-04-28 5:47 ` Mike Rapoport
2019-04-30 16:44 ` Andy Lutomirski
2019-05-01 5:39 ` Mike Rapoport
2019-04-25 21:45 ` [RFC PATCH 6/7] security: enable system call isolation in kernel config Mike Rapoport
2019-04-25 21:45 ` [RFC PATCH 7/7] sci: add example system calls to exercse SCI Mike Rapoport
2019-04-26 0:30 ` [RFC PATCH 0/7] x86: introduce system calls addess space isolation Andy Lutomirski
2019-04-26 8:07 ` Jiri Kosina
2019-04-28 6:01 ` Mike Rapoport
2019-04-26 14:41 ` Dave Hansen
2019-04-28 6:08 ` Mike Rapoport
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1556228754-12996-4-git-send-email-rppt@linux.ibm.com \
--to=rppt@linux.ibm.com \
--cc=James.Bottomley@hansenpartnership.com \
--cc=alexandre.chartre@oracle.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=jwadams@google.com \
--cc=keescook@chromium.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-security-module@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).