All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wei Huang <wei.huang2@amd.com>
To: "'xen-devel@lists.xensource.com'" <xen-devel@lists.xensource.com>
Subject: [PATCH][RFC] FPU LWP 1/5: clean up the FPU code
Date: Thu, 14 Apr 2011 15:37:54 -0500	[thread overview]
Message-ID: <4DA75B22.1010702@amd.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 503 bytes --]

This patch reorganizes and cleans up the FPU code. Main changes include:

1. Related functions are re-arranged together.
2. FPU save/restore code are extracted out as independent functions 
(fsave/frstor, fxsave/fxrstor, xsave/xrstor).
3. Two FPU entry functions, fpu_save() and fpu_restore(), are defined. 
They utilize xsave/xrstor to save/restore FPU state if CPU supports 
extended state. Otherwise they fall back to fxsave/fxrstor or fsave/frstor.

Signed-off-by: Wei Huang <wei.huang2@amd.com>




[-- Attachment #2: lwp1.txt --]
[-- Type: text/plain, Size: 16359 bytes --]

# HG changeset patch
# User Wei Huang <wei.huang2@amd.com>
# Date 1302811927 18000
# Branch lwp3
# Node ID 4ed4e3d9106e691dddf3bbf54afd935ef0a9000f
# Parent  476b4ba2c3b55f95b6e80bbe94a24bab3b3779d9
FPU: clean up the FPU code

This patch reorganizes and cleans up the FPU code. Main changes include:

1. Related functions are re-arranged together.
2. FPU save/restore code are extracted out as independent functions (fsave/frstor, fxsave/fxrstor, xsave/xrstor).
3. Two FPU entry functions, fpu_save() and fpu_restore(), are defined. They utilize xsave/xrstor to save/restore FPU state if CPU supports extended state. Otherwise they fall back to fxsave/fxrstor or fsave/frstor.

Signed-off-by: Wei Huang <wei.huang2@amd.com>

diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/acpi/suspend.c
--- a/xen/arch/x86/acpi/suspend.c	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/arch/x86/acpi/suspend.c	Thu Apr 14 15:12:07 2011 -0500
@@ -24,7 +24,7 @@
 
 void save_rest_processor_state(void)
 {
-    save_init_fpu(current);
+    fpu_save(current);
 
 #if defined(CONFIG_X86_64)
     asm volatile (
diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/arch/x86/domain.c	Thu Apr 14 15:12:07 2011 -0500
@@ -1576,7 +1576,7 @@
     if ( !is_idle_vcpu(p) )
     {
         memcpy(&p->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES);
-        save_init_fpu(p);
+        fpu_save(p);
         p->arch.ctxt_switch_from(p);
     }
 
diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c	Thu Apr 14 15:12:07 2011 -0500
@@ -348,7 +348,7 @@
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
-    setup_fpu(v);
+    fpu_restore(v);
     vmcb_set_exception_intercepts(
         vmcb, vmcb_get_exception_intercepts(vmcb) & ~(1U << TRAP_no_device));
 }
diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 14 15:12:07 2011 -0500
@@ -612,7 +612,7 @@
 
 static void vmx_fpu_enter(struct vcpu *v)
 {
-    setup_fpu(v);
+    fpu_restore(v);
     v->arch.hvm_vmx.exception_bitmap &= ~(1u << TRAP_no_device);
     vmx_update_exception_bitmap(v);
     v->arch.hvm_vmx.host_cr0 &= ~X86_CR0_TS;
diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/i387.c
--- a/xen/arch/x86/i387.c	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/arch/x86/i387.c	Thu Apr 14 15:12:07 2011 -0500
@@ -16,212 +16,12 @@
 #include <asm/i387.h>
 #include <asm/asm_defns.h>
 
+/******************************/
+/*      x86 XSAVE/XRSTOR      */
+/******************************/
+#define XSTATE_CPUID 0x0000000d
+
 static bool_t __read_mostly cpu_has_xsaveopt;
-
-static void xsave(struct vcpu *v)
-{
-    struct xsave_struct *ptr = v->arch.xsave_area;
-
-    asm volatile (
-        ".byte " REX_PREFIX "0x0f,0xae,0x27"
-        :
-        : "a" (-1), "d" (-1), "D"(ptr)
-        : "memory" );
-}
-
-static void xsaveopt(struct vcpu *v)
-{
-    struct xsave_struct *ptr = v->arch.xsave_area;
-
-    asm volatile (
-        ".byte " REX_PREFIX "0x0f,0xae,0x37"
-        :
-        : "a" (-1), "d" (-1), "D"(ptr)
-        : "memory" );
-}
-
-static void xrstor(struct vcpu *v)
-{
-    struct xsave_struct *ptr = v->arch.xsave_area;
-
-    asm volatile (
-        ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-        :
-        : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) );
-}
-
-static void load_mxcsr(unsigned long val)
-{
-    val &= 0xffbf;
-    asm volatile ( "ldmxcsr %0" : : "m" (val) );
-}
-
-static void init_fpu(void);
-static void restore_fpu(struct vcpu *v);
-
-void setup_fpu(struct vcpu *v)
-{
-    ASSERT(!is_idle_vcpu(v));
-
-    /* Avoid recursion. */
-    clts();
-
-    if ( v->fpu_dirtied )
-        return;
-
-    if ( xsave_enabled(v) )
-    {
-        /*
-         * XCR0 normally represents what guest OS set. In case of Xen itself, 
-         * we set all supported feature mask before doing save/restore.
-         */
-        set_xcr0(v->arch.xcr0_accum);
-        xrstor(v);
-        set_xcr0(v->arch.xcr0);
-    }
-    else if ( v->fpu_initialised )
-    {
-        restore_fpu(v);
-    }
-    else
-    {
-        init_fpu();
-    }
-
-    v->fpu_initialised = 1;
-    v->fpu_dirtied = 1;
-}
-
-static void init_fpu(void)
-{
-    asm volatile ( "fninit" );
-    if ( cpu_has_xmm )
-        load_mxcsr(0x1f80);
-}
-
-void save_init_fpu(struct vcpu *v)
-{
-    unsigned long cr0;
-    char *fpu_ctxt;
-
-    if ( !v->fpu_dirtied )
-        return;
-
-    ASSERT(!is_idle_vcpu(v));
-
-    cr0 = read_cr0();
-    fpu_ctxt = v->arch.fpu_ctxt;
-
-    /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
-    if ( cr0 & X86_CR0_TS )
-        clts();
-
-    if ( xsave_enabled(v) )
-    {
-        /* XCR0 normally represents what guest OS set. In case of Xen itself,
-         * we set all accumulated feature mask before doing save/restore.
-         */
-        set_xcr0(v->arch.xcr0_accum);
-        if ( cpu_has_xsaveopt )
-            xsaveopt(v);
-        else
-            xsave(v);
-        set_xcr0(v->arch.xcr0);
-    }
-    else if ( cpu_has_fxsr )
-    {
-#ifdef __i386__
-        asm volatile (
-            "fxsave %0"
-            : "=m" (*fpu_ctxt) );
-#else /* __x86_64__ */
-        /*
-         * The only way to force fxsaveq on a wide range of gas versions. On 
-         * older versions the rex64 prefix works only if we force an
-         * addressing mode that doesn't require extended registers.
-         */
-        asm volatile (
-            REX64_PREFIX "fxsave (%1)"
-            : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
-#endif
-
-        /* Clear exception flags if FSW.ES is set. */
-        if ( unlikely(fpu_ctxt[2] & 0x80) )
-            asm volatile ("fnclex");
-
-        /*
-         * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
-         * is pending. Clear the x87 state here by setting it to fixed
-         * values. The hypervisor data segment can be sometimes 0 and
-         * sometimes new user value. Both should be ok. Use the FPU saved
-         * data block as a safe address because it should be in L1.
-         */
-        if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
-        {
-            asm volatile (
-                "emms\n\t"  /* clear stack tags */
-                "fildl %0"  /* load to clear state */
-                : : "m" (*fpu_ctxt) );
-        }
-    }
-    else
-    {
-        /* FWAIT is required to make FNSAVE synchronous. */
-        asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) );
-    }
-
-    v->fpu_dirtied = 0;
-    write_cr0(cr0|X86_CR0_TS);
-}
-
-static void restore_fpu(struct vcpu *v)
-{
-    const char *fpu_ctxt = v->arch.fpu_ctxt;
-
-    /*
-     * FXRSTOR can fault if passed a corrupted data block. We handle this
-     * possibility, which may occur if the block was passed to us by control
-     * tools, by silently clearing the block.
-     */
-    if ( cpu_has_fxsr )
-    {
-        asm volatile (
-#ifdef __i386__
-            "1: fxrstor %0            \n"
-#else /* __x86_64__ */
-            /* See above for why the operands/constraints are this way. */
-            "1: " REX64_PREFIX "fxrstor (%2)\n"
-#endif
-            ".section .fixup,\"ax\"   \n"
-            "2: push %%"__OP"ax       \n"
-            "   push %%"__OP"cx       \n"
-            "   push %%"__OP"di       \n"
-            "   lea  %0,%%"__OP"di    \n"
-            "   mov  %1,%%ecx         \n"
-            "   xor  %%eax,%%eax      \n"
-            "   rep ; stosl           \n"
-            "   pop  %%"__OP"di       \n"
-            "   pop  %%"__OP"cx       \n"
-            "   pop  %%"__OP"ax       \n"
-            "   jmp  1b               \n"
-            ".previous                \n"
-            _ASM_EXTABLE(1b, 2b)
-            : 
-            : "m" (*fpu_ctxt),
-              "i" (sizeof(v->arch.xsave_area->fpu_sse)/4)
-#ifdef __x86_64__
-             ,"cdaSDb" (fpu_ctxt)
-#endif
-            );
-    }
-    else
-    {
-        asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) );
-    }
-}
-
-#define XSTATE_CPUID 0xd
-
 /*
  * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all
  * the supported and enabled features on the processor, including the
@@ -327,6 +127,229 @@
     return cpu_has_xsave;	
 }
 
+static void xsave(struct vcpu *v)
+{
+    struct xsave_struct *ptr = v->arch.xsave_area;
+
+    asm volatile (
+        ".byte " REX_PREFIX "0x0f,0xae,0x27"
+        :
+        : "a" (-1), "d" (-1), "D"(ptr)
+        : "memory" );
+}
+
+static void xsaveopt(struct vcpu *v)
+{
+    struct xsave_struct *ptr = v->arch.xsave_area;
+
+    asm volatile (
+        ".byte " REX_PREFIX "0x0f,0xae,0x37"
+        :
+        : "a" (-1), "d" (-1), "D"(ptr)
+        : "memory" );
+}
+
+static void xrstor(struct vcpu *v)
+{
+    struct xsave_struct *ptr = v->arch.xsave_area;
+
+    asm volatile (
+        ".byte " REX_PREFIX "0x0f,0xae,0x2f"
+        :
+        : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) );
+}
+
+/******************************/
+/*          x86 FPU           */
+/******************************/
+#define MXCSR_DEFAULT 0x1f80
+
+static void fpu_init(struct vcpu *v)
+{
+    unsigned long val;
+
+    asm volatile ( "fninit" );
+    if ( cpu_has_xmm )
+    {
+        /* load default value into MXCSR control/status register */
+        val = MXCSR_DEFAULT;
+        asm volatile ( "ldmxcsr %0" : : "m" (val) );
+    }
+}
+
+/* Restore extended state */
+static inline void fpu_xrstor(struct vcpu *v)
+{
+    /*
+     * XCR0 normally represents what guest OS set. In case of Xen itself, 
+     * we set all supported feature mask before doing save/restore.
+     */
+    set_xcr0(v->arch.xcr0_accum);
+    xrstor(v);
+    set_xcr0(v->arch.xcr0);
+}
+
+/* Restore x87 FPU, MMX, XMM and MXCSR state */
+static inline void fpu_fxrstor(struct vcpu *v)
+{
+    const char *fpu_ctxt = v->arch.fpu_ctxt;
+
+    asm volatile (
+#ifdef __i386__
+        "1: fxrstor %0            \n"
+#else /* __x86_64__ */
+        /* See above for why the operands/constraints are this way. */
+        "1: " REX64_PREFIX "fxrstor (%2)\n"
+#endif
+        ".section .fixup,\"ax\"   \n"
+        "2: push %%"__OP"ax       \n"
+        "   push %%"__OP"cx       \n"
+        "   push %%"__OP"di       \n"
+        "   lea  %0,%%"__OP"di    \n"
+        "   mov  %1,%%ecx         \n"
+        "   xor  %%eax,%%eax      \n"
+        "   rep ; stosl           \n"
+        "   pop  %%"__OP"di       \n"
+        "   pop  %%"__OP"cx       \n"
+        "   pop  %%"__OP"ax       \n"
+        "   jmp  1b               \n"
+        ".previous                \n"
+        _ASM_EXTABLE(1b, 2b)
+        : 
+        : "m" (*fpu_ctxt),
+          "i" (sizeof(v->arch.xsave_area->fpu_sse)/4)
+#ifdef __x86_64__
+          ,"cdaSDb" (fpu_ctxt)
+#endif
+        );    
+}
+
+/* Restore x87 state */
+static inline void fpu_frstor(struct vcpu *v)
+{ 
+    const char *fpu_ctxt = v->arch.fpu_ctxt;
+
+    asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) );
+}
+
+void fpu_restore(struct vcpu *v)
+{
+    ASSERT(!is_idle_vcpu(v));
+
+    /* Avoid recursion. */
+    clts();
+
+    if ( v->fpu_dirtied )
+        return;
+
+    if ( xsave_enabled(v) )
+    {
+        fpu_xrstor(v);
+    }
+    else if ( v->fpu_initialised )
+    {
+        if ( cpu_has_fxsr )
+            fpu_fxrstor(v);
+        else
+            fpu_frstor(v);
+    }
+    else
+    {
+        fpu_init(v);
+    }
+
+    v->fpu_initialised = 1;
+    v->fpu_dirtied = 1;
+}
+
+/* Save x87 extended state */
+static inline void fpu_xsave(struct vcpu *v)
+{
+    /* XCR0 normally represents what guest OS set. In case of Xen itself,
+     * we set all accumulated feature mask before doing save/restore.
+     */
+    set_xcr0(v->arch.xcr0_accum);
+    if ( cpu_has_xsaveopt )
+        xsaveopt(v);
+    else
+        xsave(v);
+    set_xcr0(v->arch.xcr0);    
+}
+
+/* Save x87 FPU, MMX, SSE and SSE2 state */
+static inline void fpu_fxsave(struct vcpu *v)
+{
+    char *fpu_ctxt = v->arch.fpu_ctxt;
+
+#ifdef __i386__
+    asm volatile (
+        "fxsave %0"
+        : "=m" (*fpu_ctxt) );
+#else /* __x86_64__ */
+    /*
+     * The only way to force fxsaveq on a wide range of gas versions. On 
+     * older versions the rex64 prefix works only if we force an
+     * addressing mode that doesn't require extended registers.
+     */
+    asm volatile (
+        REX64_PREFIX "fxsave (%1)"
+        : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) );
+#endif
+    
+    /* Clear exception flags if FSW.ES is set. */
+    if ( unlikely(fpu_ctxt[2] & 0x80) )
+        asm volatile ("fnclex");
+    
+    /*
+     * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+     * is pending. Clear the x87 state here by setting it to fixed
+     * values. The hypervisor data segment can be sometimes 0 and
+     * sometimes new user value. Both should be ok. Use the FPU saved
+     * data block as a safe address because it should be in L1.
+     */
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+    {
+        asm volatile (
+            "emms\n\t"  /* clear stack tags */
+            "fildl %0"  /* load to clear state */
+            : : "m" (*fpu_ctxt) );
+    }
+}
+
+/* Save x87 FPU state */
+static inline void fpu_fsave(struct vcpu *v)
+{
+    char *fpu_ctxt = v->arch.fpu_ctxt;
+
+    /* FWAIT is required to make FNSAVE synchronous. */
+    asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) );
+}
+
+void fpu_save(struct vcpu *v)
+{
+    unsigned long cr0;
+
+    if ( !v->fpu_dirtied )
+        return;
+
+    ASSERT(!is_idle_vcpu(v));
+
+    /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
+    cr0 = read_cr0();
+    if ( cr0 & X86_CR0_TS )
+        clts();
+
+    if ( xsave_enabled(v) )
+        fpu_xsave(v);
+    else if ( cpu_has_fxsr )
+        fpu_fxsave(v);
+    else
+        fpu_fsave(v);
+
+    v->fpu_dirtied = 0;
+    write_cr0(cr0|X86_CR0_TS);
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/arch/x86/traps.c	Thu Apr 14 15:12:07 2011 -0500
@@ -3167,7 +3167,7 @@
 
     BUG_ON(!guest_mode(regs));
 
-    setup_fpu(curr);
+    fpu_restore(curr);
 
     if ( curr->arch.pv_vcpu.ctrlreg[0] & X86_CR0_TS )
     {
diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/include/asm-x86/i387.h
--- a/xen/include/asm-x86/i387.h	Thu Apr 14 15:03:04 2011 -0500
+++ b/xen/include/asm-x86/i387.h	Thu Apr 14 15:12:07 2011 -0500
@@ -14,15 +14,13 @@
 #include <xen/types.h>
 #include <xen/percpu.h>
 
+/******************************/
+/*      x86 XSAVE/XRSTOR      */
+/******************************/
 struct vcpu;
 
 extern unsigned int xsave_cntxt_size;
 extern u64 xfeature_mask;
-
-void xsave_init(void);
-int xsave_alloc_save_area(struct vcpu *v);
-void xsave_free_save_area(struct vcpu *v);
-bool_t xsave_enabled(const struct vcpu *v);
 
 #define XSAVE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */
 #define XSTATE_FP       (1ULL << 0)
@@ -34,6 +32,16 @@
 #define XSTATE_YMM_OFFSET  XSAVE_AREA_MIN_SIZE
 #define XSTATE_YMM_SIZE    256
 #define XSAVEOPT        (1 << 0)
+
+#define XCR_XFEATURE_ENABLED_MASK   0
+
+#ifdef CONFIG_X86_64
+#define REX_PREFIX "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+DECLARE_PER_CPU(uint64_t, xcr0);
 
 struct xsave_struct
 {
@@ -48,15 +56,10 @@
     char   data[];                           /* Future new states */
 } __attribute__ ((packed, aligned (64)));
 
-#define XCR_XFEATURE_ENABLED_MASK   0
-
-#ifdef CONFIG_X86_64
-#define REX_PREFIX "0x48, "
-#else
-#define REX_PREFIX
-#endif
-
-DECLARE_PER_CPU(uint64_t, xcr0);
+void xsave_init(void);
+int xsave_alloc_save_area(struct vcpu *v);
+void xsave_free_save_area(struct vcpu *v);
+bool_t xsave_enabled(const struct vcpu *v);
 
 static inline void xsetbv(u32 index, u64 xfeatures)
 {
@@ -78,7 +81,10 @@
     return this_cpu(xcr0);
 }
 
-extern void setup_fpu(struct vcpu *v);
-extern void save_init_fpu(struct vcpu *v);
+/******************************/
+/*          x86 FPU           */
+/******************************/
+extern void fpu_save(struct vcpu *v);
+extern void fpu_restore(struct vcpu *v);
 
 #endif /* __ASM_I386_I387_H */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2011-04-14 20:37 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-14 20:37 Wei Huang [this message]
2011-04-15  9:20 ` [PATCH][RFC] FPU LWP 1/5: clean up the FPU code Keir Fraser
2011-04-15 16:22   ` Huang2, Wei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DA75B22.1010702@amd.com \
    --to=wei.huang2@amd.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.