From: Ingo Molnar <mingo@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, Borislav Petkov <bp@alien8.de>,
Thomas Gleixner <tglx@linutronix.de>,
"H. Peter Anvin" <hpa@zytor.com>,
Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Oleg Nesterov <oleg@redhat.com>
Subject: [GIT PULL] x86/fpu changes for v4.1
Date: Mon, 13 Apr 2015 10:40:07 +0200 [thread overview]
Message-ID: <20150413084007.GA28062@gmail.com> (raw)
Linus,
Please pull the latest x86-fpu-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fpu-for-linus
# HEAD: 7fc253e277ecf1ea57c2d670bdbcda3dffd19453 x86/fpu: Kill eager_fpu_init_bp()
Various x86 FPU handling cleanups, refactorings and fixes. (Borislav
Petkov, Oleg Nesterov, Rik van Riel)
Thanks,
Ingo
------------------>
Borislav Petkov (2):
x86/fpu: Fold __drop_fpu() into its sole user
x86/fpu: Rename drop_init_fpu() to fpu_reset_state()
Oleg Nesterov (14):
x86/fpu: Don't reset thread.fpu_counter
x86/fpu: Don't do __thread_fpu_end() if use_eager_fpu()
x86/fpu: Change math_error() to use unlazy_fpu(), kill (now) unused save_init_fpu()
x86/fpu: __kernel_fpu_begin() should clear fpu_owner_task even if use_eager_fpu()
x86/fpu: Always allow FPU in interrupt if use_eager_fpu()
x86/fpu: Don't abuse FPU in kernel threads if use_eager_fpu()
x86/fpu: Change xstateregs_get()/set() to use ->xsave.i387 rather than ->fxsave
x86/fpu: Factor out memset(xstate, 0) in fpu_finit() paths
x86/fpu: Document user_fpu_begin()
x86/fpu: Introduce restore_init_xstate()
x86/fpu: Use restore_init_xstate() instead of math_state_restore() on kthread exec
x86/fpu: Don't abuse drop_init_fpu() in flush_thread()
x86/fpu: Don't allocate fpu->state for swapper/0
x86/fpu: Kill eager_fpu_init_bp()
Rik van Riel (5):
x86/fpu: Move lazy restore functions up a few lines
x86/fpu: Introduce task_disable_lazy_fpu_restore() helper
x86/fpu: Use an explicit if/else in switch_fpu_prepare()
x86/fpu: Use task_disable_lazy_fpu_restore() helper
x86/fpu: Also check fpu_lazy_restore() when use_eager_fpu()
arch/x86/include/asm/fpu-internal.h | 130 ++++++++++++++++++------------------
arch/x86/kernel/i387.c | 54 ++++++++-------
arch/x86/kernel/process.c | 19 ++++--
arch/x86/kernel/signal.c | 2 +-
arch/x86/kernel/traps.c | 4 +-
arch/x86/kernel/xsave.c | 39 +++--------
6 files changed, 117 insertions(+), 131 deletions(-)
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 72ba21a8b5fc..da5e96756570 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+ per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+ tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+ return new == this_cpu_read_stable(fpu_owner_task) &&
+ cpu == new->thread.fpu.last_cpu;
+}
+
static inline int is_ia32_compat_frame(void)
{
return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
static inline void fx_finit(struct i387_fxsave_struct *fx)
{
- memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
}
@@ -351,8 +378,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
__thread_set_has_fpu(tsk);
}
-static inline void __drop_fpu(struct task_struct *tsk)
+static inline void drop_fpu(struct task_struct *tsk)
{
+ /*
+ * Forget coprocessor state..
+ */
+ preempt_disable();
+ tsk->thread.fpu_counter = 0;
+
if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
@@ -360,30 +393,29 @@ static inline void __drop_fpu(struct task_struct *tsk)
_ASM_EXTABLE(1b, 2b));
__thread_fpu_end(tsk);
}
-}
-static inline void drop_fpu(struct task_struct *tsk)
-{
- /*
- * Forget coprocessor state..
- */
- preempt_disable();
- tsk->thread.fpu_counter = 0;
- __drop_fpu(tsk);
clear_stopped_child_used_math(tsk);
preempt_enable();
}
-static inline void drop_init_fpu(struct task_struct *tsk)
+static inline void restore_init_xstate(void)
+{
+ if (use_xsave())
+ xrstor_state(init_xstate_buf, -1);
+ else
+ fxrstor_checking(&init_xstate_buf->i387);
+}
+
+/*
+ * Reset the FPU state in the eager case and drop it in the lazy case (later use
+ * will reinit it).
+ */
+static inline void fpu_reset_state(struct task_struct *tsk)
{
if (!use_eager_fpu())
drop_fpu(tsk);
- else {
- if (use_xsave())
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
- }
+ else
+ restore_init_xstate();
}
/*
@@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
*/
typedef struct { int preload; } fpu_switch_t;
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
- per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
- return new == this_cpu_read_stable(fpu_owner_task) &&
- cpu == new->thread.fpu.last_cpu;
-}
-
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
{
fpu_switch_t fpu;
@@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
- fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
- new->thread.fpu_counter > 5);
+ fpu.preload = tsk_used_math(new) &&
+ (use_eager_fpu() || new->thread.fpu_counter > 5);
+
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
- cpu = ~0;
- old->thread.fpu.last_cpu = cpu;
- old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
+ task_disable_lazy_fpu_restore(old);
+ else
+ old->thread.fpu.last_cpu = cpu;
+
+ /* But leave fpu_owner_task! */
+ old->thread.fpu.has_fpu = 0;
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
@@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
stts();
} else {
old->thread.fpu_counter = 0;
- old->thread.fpu.last_cpu = ~0;
+ task_disable_lazy_fpu_restore(old);
if (fpu.preload) {
new->thread.fpu_counter++;
- if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+ if (fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
@@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
- drop_init_fpu(new);
+ fpu_reset_state(new);
}
}
@@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
}
/*
- * Need to be preemption-safe.
+ * Needs to be preemption-safe.
*
* NOTE! user_fpu_begin() must be used only immediately before restoring
- * it. This function does not do any save/restore on their own.
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
*/
static inline void user_fpu_begin(void)
{
@@ -520,24 +540,6 @@ static inline void __save_fpu(struct task_struct *tsk)
}
/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
- if (use_eager_fpu()) {
- __save_fpu(tsk);
- return;
- }
-
- preempt_disable();
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- preempt_enable();
-}
-
-/*
* i387 state interaction
*/
static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5651fce0b71..41575b9b1021 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -42,8 +42,8 @@ void kernel_fpu_enable(void)
* be set (so that the clts/stts pair does nothing that is
* visible in the interrupted kernel thread).
*
- * Except for the eagerfpu case when we return 1 unless we've already
- * been eager and saved the state in kernel_fpu_begin().
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
@@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
return false;
if (use_eager_fpu())
- return __thread_has_fpu(current);
+ return true;
return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
@@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)
if (__thread_has_fpu(me)) {
__save_init_fpu(me);
- } else if (!use_eager_fpu()) {
+ } else {
this_cpu_write(fpu_owner_task, NULL);
- clts();
+ if (!use_eager_fpu())
+ clts();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -107,7 +108,7 @@ void __kernel_fpu_end(void)
if (__thread_has_fpu(me)) {
if (WARN_ON(restore_fpu_checking(me)))
- drop_init_fpu(me);
+ fpu_reset_state(me);
} else if (!use_eager_fpu()) {
stts();
}
@@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
if (__thread_has_fpu(tsk)) {
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- } else
- tsk->thread.fpu_counter = 0;
+ if (use_eager_fpu()) {
+ __save_fpu(tsk);
+ } else {
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ }
+ }
preempt_enable();
}
EXPORT_SYMBOL(unlazy_fpu);
@@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
return;
}
+ memset(fpu->state, 0, xstate_size);
+
if (cpu_has_fxsr) {
fx_finit(&fpu->state->fxsave);
} else {
struct i387_fsave_struct *fp = &fpu->state->fsave;
- memset(fp, 0, xstate_size);
fp->cwd = 0xffff037fu;
fp->swd = 0xffff0000u;
fp->twd = 0xffffffffu;
@@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
if (tsk_used_math(tsk)) {
if (cpu_has_fpu && tsk == current)
unlazy_fpu(tsk);
- tsk->thread.fpu.last_cpu = ~0;
+ task_disable_lazy_fpu_restore(tsk);
return 0;
}
@@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
+ struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
if (!cpu_has_xsave)
@@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
* memory layout in the thread struct, so that we can copy the entire
* xstateregs to the user using one user_regset_copyout().
*/
- memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
- xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-
+ memcpy(&xsave->i387.sw_reserved,
+ xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
/*
* Copy the xstate memory layout.
*/
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->xsave, 0, -1);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
return ret;
}
@@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
+ struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
- struct xsave_hdr_struct *xsave_hdr;
if (!cpu_has_xsave)
return -ENODEV;
@@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->xsave, 0, -1);
-
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
- target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
- xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
-
- xsave_hdr->xstate_bv &= pcntxt_mask;
+ xsave->i387.mxcsr &= mxcsr_feature_mask;
+ xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
- memset(xsave_hdr->reserved, 0, 48);
-
+ memset(&xsave->xsave_hdr.reserved, 0, 48);
return ret;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 046e2d620bbe..1d2ebadba7ac 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -69,8 +69,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
dst->thread.fpu_counter = 0;
dst->thread.fpu.has_fpu = 0;
- dst->thread.fpu.last_cpu = ~0;
dst->thread.fpu.state = NULL;
+ task_disable_lazy_fpu_restore(dst);
if (tsk_used_math(src)) {
int err = fpu_alloc(&dst->thread.fpu);
if (err)
@@ -131,13 +131,18 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- drop_init_fpu(tsk);
- /*
- * Free the FPU state for non xsave platforms. They get reallocated
- * lazily at the first use.
- */
- if (!use_eager_fpu())
+
+ if (!use_eager_fpu()) {
+ /* FPU state will be reallocated lazily at the first use. */
+ drop_fpu(tsk);
free_thread_xstate(tsk);
+ } else if (!used_math()) {
+ /* kthread execs. TODO: cleanup this horror. */
+ if (WARN_ON(init_fpu(tsk)))
+ force_sig(SIGKILL, tsk);
+ user_fpu_begin();
+ restore_init_xstate();
+ }
}
static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e5042463c1bc..59eaae6185e2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -679,7 +679,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* Ensure the signal handler starts with the new fpu state.
*/
if (used_math())
- drop_init_fpu(current);
+ fpu_reset_state(current);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4ff5d162ff9f..edf66c066da9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -734,7 +734,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
/*
* Save the info for the exception handler and clear the error.
*/
- save_init_fpu(task);
+ unlazy_fpu(task);
task->thread.trap_nr = trapnr;
task->thread.error_code = error_code;
info.si_signo = SIGFPE;
@@ -863,7 +863,7 @@ void math_state_restore(void)
kernel_fpu_disable();
__thread_fpu_begin(tsk);
if (unlikely(restore_fpu_checking(tsk))) {
- drop_init_fpu(tsk);
+ fpu_reset_state(tsk);
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
} else {
tsk->thread.fpu_counter++;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index cdc6cf903078..87a815b85f3e 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
config_enabled(CONFIG_IA32_EMULATION));
if (!buf) {
- drop_init_fpu(tsk);
+ fpu_reset_state(tsk);
return 0;
}
@@ -416,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
*/
user_fpu_begin();
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
- drop_init_fpu(tsk);
+ fpu_reset_state(tsk);
return -1;
}
}
@@ -678,19 +678,13 @@ void xsave_init(void)
this_func();
}
-static inline void __init eager_fpu_init_bp(void)
-{
- current->thread.fpu.state =
- alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
- if (!init_xstate_buf)
- setup_init_fpu_buf();
-}
-
-void eager_fpu_init(void)
+/*
+ * setup_init_fpu_buf() is __init and it is OK to call it here because
+ * init_xstate_buf will be unset only once during boot.
+ */
+void __init_refok eager_fpu_init(void)
{
- static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
-
- clear_used_math();
+ WARN_ON(used_math());
current_thread_info()->status = 0;
if (eagerfpu == ENABLE)
@@ -701,21 +695,8 @@ void eager_fpu_init(void)
return;
}
- if (boot_func) {
- boot_func();
- boot_func = NULL;
- }
-
- /*
- * This is same as math_state_restore(). But use_xsave() is
- * not yet patched to use math_state_restore().
- */
- init_fpu(current);
- __thread_fpu_begin(current);
- if (cpu_has_xsave)
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
+ if (!init_xstate_buf)
+ setup_init_fpu_buf();
}
/*
reply other threads:[~2015-04-13 8:40 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150413084007.GA28062@gmail.com \
--to=mingo@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=bp@alien8.de \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=oleg@redhat.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.