From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yu-cheng Yu Subject: [PATCH v10 25/26] x86/cet/shstk: Handle thread shadow stack Date: Wed, 29 Apr 2020 15:07:31 -0700 Message-ID: <20200429220732.31602-26-yu-cheng.yu@intel.com> References: <20200429220732.31602-1-yu-cheng.yu@intel.com> Mime-Version: 1.0 Content-Transfer-Encoding: 8bit Return-path: In-Reply-To: <20200429220732.31602-1-yu-cheng.yu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> Sender: linux-api-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org To: x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org, "H. Peter Anvin" , Thomas Gleixner , Ingo Molnar , linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linux-doc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org, linux-arch-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Arnd Bergmann , Andy Lutomirski , Balbir Singh , Borislav Petkov , Cyrill Gorcunov , Dave Hansen , Eugene Syromiatnikov , Florian Weimer , "H.J. Lu" , Jann Horn , Jonathan Corbet , Kees Cook , Mike Kravetz , Nadav Amit Cc: Yu-cheng Yu List-Id: linux-arch.vger.kernel.org The kernel allocates (and frees on thread exit) a new shadow stack for a pthread child. It is possible for the kernel to complete the clone syscall and set the child's shadow stack pointer to NULL and let the child thread allocate a shadow stack for itself. There are two issues in this approach: It is not compatible with existing code that does inline syscall and it cannot handle signals before the child can successfully allocate a shadow stack. A 64-bit shadow stack has a size of min(RLIMIT_STACK, 4 GB). A compat-mode thread shadow stack has a size of 1/4 min(RLIMIT_STACK, 4 GB). This allows more threads to run in a 32-bit address space. Signed-off-by: Yu-cheng Yu --- v10: - Limit shadow stack size to 4 GB. arch/x86/include/asm/cet.h | 2 ++ arch/x86/include/asm/mmu_context.h | 3 +++ arch/x86/kernel/cet.c | 41 ++++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 7 +++++ 4 files changed, 53 insertions(+) diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h index 56fe08eebae6..71dc92acd2f2 100644 --- a/arch/x86/include/asm/cet.h +++ b/arch/x86/include/asm/cet.h @@ -18,11 +18,13 @@ struct cet_status { #ifdef CONFIG_X86_INTEL_CET int cet_setup_shstk(void); +int cet_setup_thread_shstk(struct task_struct *p); void cet_disable_free_shstk(struct task_struct *p); int cet_verify_rstor_token(bool ia32, unsigned long ssp, unsigned long *new_ssp); void cet_restore_signal(struct sc_ext *sc); int cet_setup_signal(bool ia32, unsigned long rstor, struct sc_ext *sc); #else +static inline int cet_setup_thread_shstk(struct task_struct *p) { return 0; } static inline void cet_disable_free_shstk(struct task_struct *p) {} static inline void cet_restore_signal(struct sc_ext *sc) { return; } static inline int cet_setup_signal(bool ia32, unsigned long rstor, diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4e55370e48e8..bb7a4a2d6923 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,6 +12,7 @@ #include #include #include +#include #include extern atomic64_t last_mm_ctx_id; @@ -155,6 +156,8 @@ do { \ #else #define deactivate_mm(tsk, mm) \ do { \ + if (!tsk->vfork_done) \ + cet_disable_free_shstk(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c index 274fecdd9669..121552047b86 100644 --- a/arch/x86/kernel/cet.c +++ b/arch/x86/kernel/cet.c @@ -169,6 +169,47 @@ int cet_setup_shstk(void) return 0; } +int cet_setup_thread_shstk(struct task_struct *tsk) +{ + unsigned long addr, size; + struct cet_user_state *state; + struct cet_status *cet = &tsk->thread.cet; + + if (!cet->shstk_size) + return 0; + + state = get_xsave_addr(&tsk->thread.fpu.state.xsave, + XFEATURE_CET_USER); + + if (!state) + return -EINVAL; + + /* Cap shadow stack size to 4 GB */ + size = min(rlimit(RLIMIT_STACK), 1UL << 32); + + /* + * Compat-mode pthreads share a limited address space. + * If each function call takes an average of four slots + * stack space, we need 1/4 of stack size for shadow stack. + */ + if (in_compat_syscall()) + size /= 4; + size = round_up(size, PAGE_SIZE); + addr = alloc_shstk(size); + + if (IS_ERR((void *)addr)) { + cet->shstk_base = 0; + cet->shstk_size = 0; + return PTR_ERR((void *)addr); + } + + fpu__prepare_write(&tsk->thread.fpu); + state->user_ssp = (u64)(addr + size); + cet->shstk_base = addr; + cet->shstk_size = size; + return 0; +} + void cet_disable_free_shstk(struct task_struct *tsk) { struct cet_status *cet = &tsk->thread.cet; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9d9cff2c1018..ef1c2b8086a2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -109,6 +109,7 @@ void exit_thread(struct task_struct *tsk) free_vm86(t); + cet_disable_free_shstk(tsk); fpu__drop(fpu); } @@ -179,6 +180,12 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (clone_flags & CLONE_SETTLS) ret = set_new_tls(p, tls); +#ifdef CONFIG_X86_64 + /* Allocate a new shadow stack for pthread */ + if (!ret && (clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM) + ret = cet_setup_thread_shstk(p); +#endif + if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) io_bitmap_share(p); -- 2.21.0 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Yu-cheng Yu Subject: [PATCH v10 25/26] x86/cet/shstk: Handle thread shadow stack Date: Wed, 29 Apr 2020 15:07:31 -0700 Message-ID: <20200429220732.31602-26-yu-cheng.yu@intel.com> In-Reply-To: <20200429220732.31602-1-yu-cheng.yu@intel.com> References: <20200429220732.31602-1-yu-cheng.yu@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-doc-owner@vger.kernel.org To: x86@kernel.org, "H. Peter Anvin" , Thomas Gleixner , Ingo Molnar , linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, linux-api@vger.kernel.org, Arnd Bergmann , Andy Lutomirski , Balbir Singh , Borislav Petkov , Cyrill Gorcunov , Dave Hansen , Eugene Syromiatnikov , Florian Weimer , "H.J. Lu" , Jann Horn , Jonathan Corbet , Kees Cook , Mike Kravetz , Nadav Amit , Oleg Nesterov , Pavel Machek , Peter Zijlstra , Randy Dunlap , "Ravi V. Shankar" , Vedvyas Shanbhogue , Dave Martin , Weijiang Yang Cc: Yu-cheng Yu List-ID: Message-ID: <20200429220731.yj2LG6LqKskz4QBKDbZgDNek-Mt3sEf3tukaUjur1sU@z> The kernel allocates (and frees on thread exit) a new shadow stack for a pthread child. It is possible for the kernel to complete the clone syscall and set the child's shadow stack pointer to NULL and let the child thread allocate a shadow stack for itself. There are two issues in this approach: It is not compatible with existing code that does inline syscall and it cannot handle signals before the child can successfully allocate a shadow stack. A 64-bit shadow stack has a size of min(RLIMIT_STACK, 4 GB). A compat-mode thread shadow stack has a size of 1/4 min(RLIMIT_STACK, 4 GB). This allows more threads to run in a 32-bit address space. Signed-off-by: Yu-cheng Yu --- v10: - Limit shadow stack size to 4 GB. arch/x86/include/asm/cet.h | 2 ++ arch/x86/include/asm/mmu_context.h | 3 +++ arch/x86/kernel/cet.c | 41 ++++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 7 +++++ 4 files changed, 53 insertions(+) diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h index 56fe08eebae6..71dc92acd2f2 100644 --- a/arch/x86/include/asm/cet.h +++ b/arch/x86/include/asm/cet.h @@ -18,11 +18,13 @@ struct cet_status { #ifdef CONFIG_X86_INTEL_CET int cet_setup_shstk(void); +int cet_setup_thread_shstk(struct task_struct *p); void cet_disable_free_shstk(struct task_struct *p); int cet_verify_rstor_token(bool ia32, unsigned long ssp, unsigned long *new_ssp); void cet_restore_signal(struct sc_ext *sc); int cet_setup_signal(bool ia32, unsigned long rstor, struct sc_ext *sc); #else +static inline int cet_setup_thread_shstk(struct task_struct *p) { return 0; } static inline void cet_disable_free_shstk(struct task_struct *p) {} static inline void cet_restore_signal(struct sc_ext *sc) { return; } static inline int cet_setup_signal(bool ia32, unsigned long rstor, diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4e55370e48e8..bb7a4a2d6923 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,6 +12,7 @@ #include #include #include +#include #include extern atomic64_t last_mm_ctx_id; @@ -155,6 +156,8 @@ do { \ #else #define deactivate_mm(tsk, mm) \ do { \ + if (!tsk->vfork_done) \ + cet_disable_free_shstk(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c index 274fecdd9669..121552047b86 100644 --- a/arch/x86/kernel/cet.c +++ b/arch/x86/kernel/cet.c @@ -169,6 +169,47 @@ int cet_setup_shstk(void) return 0; } +int cet_setup_thread_shstk(struct task_struct *tsk) +{ + unsigned long addr, size; + struct cet_user_state *state; + struct cet_status *cet = &tsk->thread.cet; + + if (!cet->shstk_size) + return 0; + + state = get_xsave_addr(&tsk->thread.fpu.state.xsave, + XFEATURE_CET_USER); + + if (!state) + return -EINVAL; + + /* Cap shadow stack size to 4 GB */ + size = min(rlimit(RLIMIT_STACK), 1UL << 32); + + /* + * Compat-mode pthreads share a limited address space. + * If each function call takes an average of four slots + * stack space, we need 1/4 of stack size for shadow stack. + */ + if (in_compat_syscall()) + size /= 4; + size = round_up(size, PAGE_SIZE); + addr = alloc_shstk(size); + + if (IS_ERR((void *)addr)) { + cet->shstk_base = 0; + cet->shstk_size = 0; + return PTR_ERR((void *)addr); + } + + fpu__prepare_write(&tsk->thread.fpu); + state->user_ssp = (u64)(addr + size); + cet->shstk_base = addr; + cet->shstk_size = size; + return 0; +} + void cet_disable_free_shstk(struct task_struct *tsk) { struct cet_status *cet = &tsk->thread.cet; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9d9cff2c1018..ef1c2b8086a2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -109,6 +109,7 @@ void exit_thread(struct task_struct *tsk) free_vm86(t); + cet_disable_free_shstk(tsk); fpu__drop(fpu); } @@ -179,6 +180,12 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, if (clone_flags & CLONE_SETTLS) ret = set_new_tls(p, tls); +#ifdef CONFIG_X86_64 + /* Allocate a new shadow stack for pthread */ + if (!ret && (clone_flags & (CLONE_VFORK | CLONE_VM)) == CLONE_VM) + ret = cet_setup_thread_shstk(p); +#endif + if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP))) io_bitmap_share(p); -- 2.21.0