xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] FPU LWP 5/8: add a mask option to xsave() and xrstor()
@ 2011-05-03 20:17 Wei Huang
  2011-05-04  7:02 ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Wei Huang @ 2011-05-03 20:17 UTC (permalink / raw)
  To: 'xen-devel@lists.xensource.com', Keir Fraser, Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 219 bytes --]

XSAVE: add a mask option to xsave() and xrstor()

Xen currently sets mask bits of xsave() and xrstor() to all 1's. This 
patch adds a mask option to xsave() and xrstor().

Signed-off-by: Wei Huang <wei.huang2@amd.com>


[-- Attachment #2: lwp5.txt --]
[-- Type: text/plain, Size: 4229 bytes --]

# HG changeset patch
# User Wei Huang <wei.huang2@amd.com>
# Date 1304448567 18000
# Node ID 83db82b67f65bee91f35e9caaad700a78ac0a3fc
# Parent  63208cfe3c558cebc5149fc569702785f6d8e73b
XSAVE: add a mask option to xsave() and xrstor()

Xen currently sets mask bits of xsave() and xrstor() to all 1's. This patch adds a mask option to xsave() and xrstor().

Signed-off-by: Wei Huang <wei.huang2@amd.com>

diff -r 63208cfe3c55 -r 83db82b67f65 xen/arch/x86/i387.c
--- a/xen/arch/x86/i387.c	Tue May 03 13:45:26 2011 -0500
+++ b/xen/arch/x86/i387.c	Tue May 03 13:49:27 2011 -0500
@@ -35,14 +35,14 @@
 /*     FPU Restore Functions   */
 /*******************************/
 /* Restore x87 extended state */
-static inline void fpu_xrstor(struct vcpu *v)
+static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
 {
     /*
      * XCR0 normally represents what guest OS set. In case of Xen itself, 
      * we set all supported feature mask before doing save/restore.
      */
     set_xcr0(v->arch.xcr0_accum);
-    xrstor(v);
+    xrstor(v, mask);
     set_xcr0(v->arch.xcr0);
 }
 
@@ -98,13 +98,13 @@
 /*      FPU Save Functions     */
 /*******************************/
 /* Save x87 extended state */
-static inline void fpu_xsave(struct vcpu *v)
+static inline void fpu_xsave(struct vcpu *v, uint64_t mask)
 {
     /* XCR0 normally represents what guest OS set. In case of Xen itself,
      * we set all accumulated feature mask before doing save/restore.
      */
     set_xcr0(v->arch.xcr0_accum);
-    xsave(v);
+    xsave(v, mask);
     set_xcr0(v->arch.xcr0);    
 }
 
@@ -174,7 +174,7 @@
         return;
 
     if ( xsave_enabled(v) )
-        fpu_xrstor(v);
+        fpu_xrstor(v, XSTATE_ALL);
     else if ( v->fpu_initialised )
     {
         if ( cpu_has_fxsr )
@@ -204,7 +204,7 @@
     clts();
 
     if ( xsave_enabled(v) )
-        fpu_xsave(v);
+        fpu_xsave(v, XSTATE_ALL);
     else if ( cpu_has_fxsr )
         fpu_fxsave(v);
     else
diff -r 63208cfe3c55 -r 83db82b67f65 xen/arch/x86/xstate.c
--- a/xen/arch/x86/xstate.c	Tue May 03 13:45:26 2011 -0500
+++ b/xen/arch/x86/xstate.c	Tue May 03 13:49:27 2011 -0500
@@ -51,32 +51,37 @@
     return this_cpu(xcr0);
 }
 
-void xsave(struct vcpu *v)
+void xsave(struct vcpu *v, uint64_t mask)
 {
     struct xsave_struct *ptr = v->arch.xsave_area;
+    uint32_t hmask = mask >> 32;
+    uint32_t lmask = mask;
 
     if ( cpu_has_xsaveopt )
         asm volatile (
             ".byte " REX_PREFIX "0x0f,0xae,0x37"
             :
-            : "a" (-1), "d" (-1), "D"(ptr)
+            : "a" (lmask), "d" (hmask), "D"(ptr)
             : "memory" );
     else
         asm volatile (
             ".byte " REX_PREFIX "0x0f,0xae,0x27"
             :
-            : "a" (-1), "d" (-1), "D"(ptr)
+            : "a" (lmask), "d" (hmask), "D"(ptr)
             : "memory" );
 }
 
-void xrstor(struct vcpu *v)
+void xrstor(struct vcpu *v, uint64_t mask)
 {
+    uint32_t hmask = mask >> 32;
+    uint32_t lmask = mask;
+
     struct xsave_struct *ptr = v->arch.xsave_area;
 
     asm volatile (
         ".byte " REX_PREFIX "0x0f,0xae,0x2f"
         :
-        : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) );
+        : "m" (*ptr), "a" (lmask), "d" (hmask), "D"(ptr) );
 }
 
 bool_t xsave_enabled(const struct vcpu *v)
diff -r 63208cfe3c55 -r 83db82b67f65 xen/include/asm-x86/xstate.h
--- a/xen/include/asm-x86/xstate.h	Tue May 03 13:45:26 2011 -0500
+++ b/xen/include/asm-x86/xstate.h	Tue May 03 13:49:27 2011 -0500
@@ -26,6 +26,10 @@
 #define XSTATE_LWP     (1ULL << 62) /* AMD lightweight profiling */
 #define XSTATE_FP_SSE  (XSTATE_FP | XSTATE_SSE)
 #define XCNTXT_MASK    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_LWP)
+
+#define XSTATE_LAZY    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+#define XSTATE_NONLAZY (XSTATE_LWP)
+#define XSTATE_ALL     (~0)
 
 #ifdef CONFIG_X86_64
 #define REX_PREFIX     "0x48, "
@@ -56,8 +60,8 @@
 /* extended state operations */
 void set_xcr0(u64 xfeatures);
 uint64_t get_xcr0(void);
-void xsave(struct vcpu *v);
-void xrstor(struct vcpu *v);
+void xsave(struct vcpu *v, uint64_t mask);
+void xrstor(struct vcpu *v, uint64_t mask);
 bool_t xsave_enabled(const struct vcpu *v);
 
 /* extended state init and cleanup functions */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] FPU LWP 5/8: add a mask option to xsave() and xrstor()
  2011-05-03 20:17 [PATCH] FPU LWP 5/8: add a mask option to xsave() and xrstor() Wei Huang
@ 2011-05-04  7:02 ` Jan Beulich
  2011-05-04 16:03   ` Wei Huang
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2011-05-04  7:02 UTC (permalink / raw)
  To: Wei Huang; +Cc: xen-devel@lists.xensource.com, Keir Fraser

>>> On 03.05.11 at 22:17, Wei Huang <wei.huang2@amd.com> wrote:
>+#define XSTATE_LAZY    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
>+#define XSTATE_NONLAZY (XSTATE_LWP)
>+#define XSTATE_ALL     (~0)

As said before, this isn't forward compatible. New bits added in
future hardware should explicitly *not* require changes to the OS
(or hypervisor in our case). If you're certain LWP will remain the
only piece not controlled via CR0.TS, then you'll want

#define XSTATE_LAZY    (XSTATE_ALL & ~XSTATE_NONLAZY)

If you aren't (and I'm afraid you can't), then you'll have to ask
your hardware guys to provide a means to detect which of the
bits cover state not controlled by CR0.TS, and set these masks
dynamically.

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] FPU LWP 5/8: add a mask option to xsave() and  xrstor()
  2011-05-04  7:02 ` Jan Beulich
@ 2011-05-04 16:03   ` Wei Huang
  2011-05-05  7:09     ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Wei Huang @ 2011-05-04 16:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir, xen-devel@lists.xensource.com, Fraser

Hi Jan,

That is a good point. So far there isn't a way to decide which bits are 
guarded by CR0.TS. I will bring it up to our design team. I guess LWP 
will the only exception for a long while. Is the first approach 
sufficient/acceptable to you for now?

#define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY)

Thanks,
-Wei


On 05/04/2011 02:02 AM, Jan Beulich wrote:
>>>> On 03.05.11 at 22:17, Wei Huang<wei.huang2@amd.com>  wrote:
>> +#define XSTATE_LAZY    (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
>> +#define XSTATE_NONLAZY (XSTATE_LWP)
>> +#define XSTATE_ALL     (~0)
> As said before, this isn't forward compatible. New bits added in
> future hardware should explicitly *not* require changes to the OS
> (or hypervisor in our case). If you're certain LWP will remain the
> only piece not controlled via CR0.TS, then you'll want
>
> #define XSTATE_LAZY    (XSTATE_ALL&  ~XSTATE_NONLAZY)
>
> If you aren't (and I'm afraid you can't), then you'll have to ask
> your hardware guys to provide a means to detect which of the
> bits cover state not controlled by CR0.TS, and set these masks
> dynamically.
>
> Jan
>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] FPU LWP 5/8: add a mask option to xsave() and xrstor()
  2011-05-04 16:03   ` Wei Huang
@ 2011-05-05  7:09     ` Jan Beulich
  0 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2011-05-05  7:09 UTC (permalink / raw)
  To: Wei Huang; +Cc: xen-devel@lists.xensource.com, KeirFraser

>>> On 04.05.11 at 18:03, Wei Huang <wei.huang2@amd.com> wrote:
> Hi Jan,
> 
> That is a good point. So far there isn't a way to decide which bits are 
> guarded by CR0.TS. I will bring it up to our design team. I guess LWP 
> will the only exception for a long while. Is the first approach 
> sufficient/acceptable to you for now?
> 
> #define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY)

Yes.

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-05-05  7:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-03 20:17 [PATCH] FPU LWP 5/8: add a mask option to xsave() and xrstor() Wei Huang
2011-05-04  7:02 ` Jan Beulich
2011-05-04 16:03   ` Wei Huang
2011-05-05  7:09     ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).