public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Suresh Siddha <suresh.b.siddha@intel.com>
To: mingo@elte.hu, hpa@zytor.com, tglx@linutronix.de,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	arjan@linux.intel.com, roland@redhat.com, drepper@redhat.com,
	mikpe@it.uu.se, chrisw@sous-sol.org, andi@firstfloor.org
Cc: linux-kernel@vger.kernel.org, suresh.b.siddha@intel.com
Subject: [patch 3/9] x86, xsave: context switch support using xsave/xrstor
Date: Tue, 29 Jul 2008 10:29:20 -0700	[thread overview]
Message-ID: <20080729173157.670824000@linux-os.sc.intel.com> (raw)
In-Reply-To: 20080729172917.185593000@linux-os.sc.intel.com

[-- Attachment #1: xsave_in_context_switch.patch --]
[-- Type: text/plain, Size: 8740 bytes --]

Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch
when available.

Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor
instructions during context switch instead of the legacy fxsave/fxrstor
instructions. Thread-synchronous status word is already in L1 cache during
this code patch and thus minimizes the performance penality compared to
(cpu_has_xsave) checks.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip-0728/arch/x86/kernel/cpu/common.c
===================================================================
--- tip-0728.orig/arch/x86/kernel/cpu/common.c	2008-07-28 18:23:14.000000000 -0700
+++ tip-0728/arch/x86/kernel/cpu/common.c	2008-07-28 18:27:28.000000000 -0700
@@ -709,7 +709,10 @@
 	/*
 	 * Force FPU initialization:
 	 */
-	current_thread_info()->status = 0;
+	if (cpu_has_xsave)
+		current_thread_info()->status = TS_XSAVE;
+	else
+		current_thread_info()->status = 0;
 	clear_used_math();
 	mxcsr_feature_mask_init();
 
Index: tip-0728/arch/x86/kernel/i387.c
===================================================================
--- tip-0728.orig/arch/x86/kernel/i387.c	2008-07-28 18:23:45.000000000 -0700
+++ tip-0728/arch/x86/kernel/i387.c	2008-07-28 18:27:28.000000000 -0700
@@ -97,7 +97,10 @@
 
 	mxcsr_feature_mask_init();
 	/* clean state in init */
-	current_thread_info()->status = 0;
+	if (cpu_has_xsave)
+		current_thread_info()->status = TS_XSAVE;
+	else
+		current_thread_info()->status = 0;
 	clear_used_math();
 }
 #endif	/* CONFIG_X86_64 */
Index: tip-0728/include/asm-x86/i387.h
===================================================================
--- tip-0728.orig/include/asm-x86/i387.h	2008-07-28 18:23:14.000000000 -0700
+++ tip-0728/include/asm-x86/i387.h	2008-07-28 18:27:28.000000000 -0700
@@ -37,6 +37,8 @@
 extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
 #endif
 
+#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+
 #ifdef CONFIG_X86_64
 
 /* Ignore delayed exceptions from user space */
@@ -47,7 +49,7 @@
 		     _ASM_EXTABLE(1b, 2b));
 }
 
-static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
 	int err;
 
@@ -67,15 +69,31 @@
 	return err;
 }
 
-#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		return xrstor_checking(&tsk->thread.xstate->xsave);
+	else
+		return fxrstor_checking(&tsk->thread.xstate->fxsave);
+}
 
 /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
    is pending. Clear the x87 state here by setting it to fixed
    values. The kernel data segment can be sometimes 0 and sometimes
    new user value. Both should be ok.
    Use the PDA as safe address because it should be already in L1. */
-static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+static inline void clear_fpu_state(struct task_struct *tsk)
 {
+	struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
+	struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+
+	/*
+	 * xsave header may indicate the init state of the FP.
+	 */
+	if ((task_thread_info(tsk)->status & TS_XSAVE) &&
+	    !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
+		return;
+
 	if (unlikely(fx->swd & X87_FSW_ES))
 		asm volatile("fnclex");
 	alternative_input(ASM_NOP8 ASM_NOP2,
@@ -108,7 +126,7 @@
 	return err;
 }
 
-static inline void __save_init_fpu(struct task_struct *tsk)
+static inline void fxsave(struct task_struct *tsk)
 {
 	/* Using "rex64; fxsave %0" is broken because, if the memory operand
 	   uses any extended registers for addressing, a second REX prefix
@@ -133,7 +151,16 @@
 			     : "=m" (tsk->thread.xstate->fxsave)
 			     : "cdaSDb" (&tsk->thread.xstate->fxsave));
 #endif
-	clear_fpu_state(&tsk->thread.xstate->fxsave);
+}
+
+static inline void __save_init_fpu(struct task_struct *tsk)
+{
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		xsave(tsk);
+	else
+		fxsave(tsk);
+
+	clear_fpu_state(tsk);
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
@@ -148,6 +175,10 @@
 
 static inline void restore_fpu(struct task_struct *tsk)
 {
+	if (task_thread_info(tsk)->status & TS_XSAVE) {
+		xrstor_checking(&tsk->thread.xstate->xsave);
+		return;
+	}
 	/*
 	 * The "nop" is needed to make the instructions the same
 	 * length.
@@ -173,6 +204,27 @@
  */
 static inline void __save_init_fpu(struct task_struct *tsk)
 {
+	if (task_thread_info(tsk)->status & TS_XSAVE) {
+		struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
+		struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+
+		xsave(tsk);
+
+		/*
+	 	 * xsave header may indicate the init state of the FP.
+	 	 */
+		if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
+			goto end;
+
+		if (unlikely(fx->swd & X87_FSW_ES))
+			asm volatile("fnclex");
+
+		/*
+		 * we can do a simple return here or be paranoid :)
+		 */
+		goto clear_state;
+	}
+
 	/* Use more nops than strictly needed in case the compiler
 	   varies code */
 	alternative_input(
@@ -182,6 +234,7 @@
 		X86_FEATURE_FXSR,
 		[fx] "m" (tsk->thread.xstate->fxsave),
 		[fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
+clear_state:
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. safe_address is a random variable that should be in L1 */
@@ -191,6 +244,7 @@
 		"fildl %[addr]", 	/* set F?P to defined value */
 		X86_FEATURE_FXSAVE_LEAK,
 		[addr] "m" (safe_address));
+end:
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
Index: tip-0728/include/asm-x86/processor.h
===================================================================
--- tip-0728.orig/include/asm-x86/processor.h	2008-07-28 18:23:14.000000000 -0700
+++ tip-0728/include/asm-x86/processor.h	2008-07-28 18:27:28.000000000 -0700
@@ -367,6 +367,7 @@
 	struct i387_fsave_struct	fsave;
 	struct i387_fxsave_struct	fxsave;
 	struct i387_soft_struct		soft;
+	struct xsave_struct		xsave;
 };
 
 #ifdef CONFIG_X86_64
Index: tip-0728/include/asm-x86/thread_info.h
===================================================================
--- tip-0728.orig/include/asm-x86/thread_info.h	2008-07-28 18:20:15.000000000 -0700
+++ tip-0728/include/asm-x86/thread_info.h	2008-07-28 18:27:28.000000000 -0700
@@ -241,6 +241,7 @@
 #define TS_POLLING		0x0004	/* true if in idle loop
 					   and not sleeping */
 #define TS_RESTORE_SIGMASK	0x0008	/* restore signal mask in do_signal() */
+#define TS_XSAVE		0x0010	/* Use xsave/xrstor */
 
 #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
 
Index: tip-0728/include/asm-x86/xsave.h
===================================================================
--- tip-0728.orig/include/asm-x86/xsave.h	2008-07-28 18:23:14.000000000 -0700
+++ tip-0728/include/asm-x86/xsave.h	2008-07-28 18:27:28.000000000 -0700
@@ -17,10 +17,43 @@
 #define XCNTXT_LMASK	(XSTATE_FP | XSTATE_SSE)
 #define XCNTXT_HMASK	0x0
 
+#ifdef CONFIG_X86_64
+#define REX_PREFIX	"0x48, "
+#else
+#define REX_PREFIX
+#endif
+
 extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask;
 extern struct xsave_struct *init_xstate_buf;
 
 extern void xsave_cntxt_init(void);
 extern void xsave_init(void);
+extern int init_fpu(struct task_struct *child);
+
+static inline int xrstor_checking(struct xsave_struct *fx)
+{
+	int err;
+
+	asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+                     "2:\n"
+                     ".section .fixup,\"ax\"\n"
+                     "3:  movl $-1,%[err]\n"
+                     "    jmp  2b\n"
+                     ".previous\n"
+                     _ASM_EXTABLE(1b, 3b)
+                     : [err] "=r" (err)
+		     : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
+		     : "memory");
+
+	return err;
+}
 
+static inline void xsave(struct task_struct *tsk)
+{
+	/* This, however, we can work around by forcing the compiler to select
+	   an addressing mode that doesn't require extended registers. */
+	__asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
+			     ::"D" (&(tsk->thread.xstate->xsave)),
+			       "a" (-1), "d"(-1) : "memory");
+}
 #endif
Index: tip-0728/arch/x86/kernel/traps_64.c
===================================================================
--- tip-0728.orig/arch/x86/kernel/traps_64.c	2008-07-28 18:23:14.000000000 -0700
+++ tip-0728/arch/x86/kernel/traps_64.c	2008-07-28 18:27:28.000000000 -0700
@@ -1147,7 +1147,7 @@
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
-	if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+	if (unlikely(restore_fpu_checking(me))) {
 		stts();
 		force_sig(SIGSEGV, me);
 		return;

-- 


  parent reply	other threads:[~2008-07-29 17:40 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-29 17:29 [patch 0/9] x86, xsave: xsave/xrstor support Suresh Siddha
2008-07-29 17:29 ` [patch 1/9] x86, xsave: xsave cpuid feature bits Suresh Siddha
2008-07-29 17:29 ` [patch 2/9] x86, xsave: enable xsave/xrstor on cpus with xsave support Suresh Siddha
2008-07-29 17:29 ` Suresh Siddha [this message]
2008-07-29 17:29 ` [patch 4/9] x86, xsave: dynamically allocate sigframes fpstate instead of static allocation Suresh Siddha
2008-07-29 17:29 ` [patch 5/9] x86, xsave: reorganization of signal save/restore fpstate code layout Suresh Siddha
2008-07-29 17:29 ` [patch 6/9] x86, xsave: xsave/xrstor specific routines Suresh Siddha
2008-07-29 17:29 ` [patch 7/9] x86, xsave: struct _fpstate extensions to include extended state information Suresh Siddha
2008-07-29 17:29 ` [patch 8/9] x86, xsave: save/restore the extended state context in sigframe Suresh Siddha
2008-07-29 17:29 ` [patch 9/9] x86, xsave: update xsave header bits during ptrace fpregs set Suresh Siddha
2008-07-29 23:09 ` [patch 0/9] x86, xsave: xsave/xrstor support H. Peter Anvin
2008-07-29 23:29   ` Suresh Siddha
2008-07-29 23:43     ` H. Peter Anvin
2008-07-30 10:03       ` Ingo Molnar
2008-07-30 16:31         ` H. Peter Anvin
2008-07-30 17:08           ` Suresh Siddha
2008-07-30 17:14             ` H. Peter Anvin
2008-07-30 18:25         ` Ingo Molnar
2008-07-30 21:46           ` Suresh Siddha
2008-07-30 23:41           ` Suresh Siddha
2008-07-31 21:29             ` Ingo Molnar
2008-07-31 21:58               ` Suresh Siddha
2008-07-31 22:14                 ` Andi Kleen
2008-07-31 22:19                   ` Suresh Siddha
2008-07-31 22:36                     ` Andi Kleen
2008-07-31 22:38                     ` Linus Torvalds
2008-07-31 22:50                       ` Ingo Molnar
2008-08-01  2:06                         ` Rene Herman
2008-08-01  9:51                           ` Ingo Molnar
2008-08-01 14:27                             ` Rene Herman
2008-08-01 14:49                               ` Andi Kleen
2008-08-01 15:19                                 ` Rene Herman
2008-08-01 15:44                                   ` Andi Kleen
2008-08-01 16:03                                     ` Rene Herman
2008-07-31 22:48                     ` Alan Cox
2008-07-31 22:17                 ` Ingo Molnar
2008-08-13 11:00                   ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080729173157.670824000@linux-os.sc.intel.com \
    --to=suresh.b.siddha@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=chrisw@sous-sol.org \
    --cc=drepper@redhat.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikpe@it.uu.se \
    --cc=mingo@elte.hu \
    --cc=roland@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox