From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available Date: Thu, 10 Oct 2013 15:15:10 +0100 Message-ID: <5256B66E.50906@citrix.com> References: <5256CC6F02000078000FA3D0@nat28.tlf.novell.com> <5256CDCB02000078000FA3EA@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============4995821501050519387==" Return-path: Received: from mail6.bemta3.messagelabs.com ([195.245.230.39]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1VUH1I-0004q6-6N for xen-devel@lists.xenproject.org; Thu, 10 Oct 2013 14:15:16 +0000 In-Reply-To: <5256CDCB02000078000FA3EA@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: xen-devel , Keir Fraser List-Id: xen-devel@lists.xenproject.org --===============4995821501050519387== Content-Type: multipart/alternative; boundary="------------020708060507010200080509" --------------020708060507010200080509 Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit On 10/10/13 14:54, Jan Beulich wrote: > ... as being intended to be faster than MSR reads/writes. > > In the case of emulate_privileged_op() also use these in favor of the > cached (but possibly stale) addresses from arch.pv_vcpu. This allows > entirely removing the code that was the subject of XSA-67. > > Signed-off-by: Jan Beulich Reviewed-by: Andrew Cooper > > --- a/xen/arch/x86/Rules.mk > +++ b/xen/arch/x86/Rules.mk > @@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD > $(call cc-option-add,CFLAGS,CC,-Wnested-externs) > $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) > $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT) > +$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) > > ifeq ($(supervisor_mode_kernel),y) > CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 > --- a/xen/arch/x86/acpi/suspend.c > +++ b/xen/arch/x86/acpi/suspend.c > @@ -27,8 +27,8 @@ void save_rest_processor_state(void) > asm volatile ( > "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)" > : : "r" (saved_segs) : "memory" ); > - rdmsrl(MSR_FS_BASE, saved_fs_base); > - rdmsrl(MSR_GS_BASE, saved_gs_base); > + saved_fs_base = rdfsbase(); > + saved_gs_base = rdgsbase(); > rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); > rdmsrl(MSR_CSTAR, saved_cstar); > rdmsrl(MSR_LSTAR, saved_lstar); > @@ -56,8 +56,8 @@ void restore_rest_processor_state(void) > X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF, > 0U); > > - wrmsrl(MSR_FS_BASE, saved_fs_base); > - wrmsrl(MSR_GS_BASE, saved_gs_base); > + wrfsbase(saved_fs_base); > + wrgsbase(saved_gs_base); > wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); > > if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n > { > /* This can only be non-zero if selector is NULL. */ > if ( n->arch.pv_vcpu.fs_base ) > - wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base); > + wrfsbase(n->arch.pv_vcpu.fs_base); > > /* Most kernels have non-zero GS base, so don't bother testing. */ > /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ > @@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n > > /* This can only be non-zero if selector is NULL. */ > if ( n->arch.pv_vcpu.gs_base_user ) > - wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user); > + wrgsbase(n->arch.pv_vcpu.gs_base_user); > > /* If in kernel mode then switch the GS bases around. */ > if ( (n->arch.flags & TF_kernel_mode) ) > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb > if ( cpu_has_smep ) > set_in_cr4(X86_CR4_SMEP); > > + if ( cpu_has_fsgsbase ) > + set_in_cr4(X86_CR4_FSGSBASE); > + > local_irq_enable(); > > pt_pci_init(); > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct > } > else > { > - if ( lm_ovr == lm_seg_none || data_sel < 4 ) > + switch ( lm_ovr ) > { > - switch ( lm_ovr ) > - { > - case lm_seg_none: > - data_base = 0UL; > - break; > - case lm_seg_fs: > - data_base = v->arch.pv_vcpu.fs_base; > - break; > - case lm_seg_gs: > - if ( guest_kernel_mode(v, regs) ) > - data_base = v->arch.pv_vcpu.gs_base_kernel; > - else > - data_base = v->arch.pv_vcpu.gs_base_user; > - break; > - } > + default: > + data_base = 0UL; > + break; > + case lm_seg_fs: > + data_base = rdfsbase(); > + break; > + case lm_seg_gs: > + data_base = rdgsbase(); > + break; > } > - else if ( !read_descriptor(data_sel, v, regs, > - &data_base, &data_limit, &ar, 0) || > - !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) ) > - goto fail; > data_limit = ~0UL; > ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; > } > --- a/xen/include/asm-x86/domain.h > +++ b/xen/include/asm-x86/domain.h > @@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s > (((v)->arch.pv_vcpu.ctrlreg[4] \ > | (mmu_cr4_features \ > & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ > - X86_CR4_OSXSAVE)) \ > + X86_CR4_OSXSAVE | X86_CR4_FSGSBASE)) \ > | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ > & ~X86_CR4_DE) > #define real_cr4_to_pv_guest_cr4(c) \ > - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \ > - | X86_CR4_OSXSAVE | X86_CR4_SMEP)) > + ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ > + X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE)) > > void domain_cpuid(struct domain *d, > unsigned int input, > --- a/xen/include/asm-x86/msr.h > +++ b/xen/include/asm-x86/msr.h > @@ -9,6 +9,7 @@ > #include > #include > #include > +#include > > #define rdmsr(msr,val1,val2) \ > __asm__ __volatile__("rdmsr" \ > @@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in > : "=a" (low), "=d" (high) \ > : "c" (counter)) > > +static inline unsigned long rdfsbase(void) > +{ > + unsigned long base; > + > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "rdfsbase %0" : "=r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) ); > +#endif > + else > + rdmsrl(MSR_FS_BASE, base); > + > + return base; > +} > + > +static inline unsigned long rdgsbase(void) > +{ > + unsigned long base; > + > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "rdgsbase %0" : "=r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) ); > +#endif > + else > + rdmsrl(MSR_GS_BASE, base); > + > + return base; > +} > + > +static inline void wrfsbase(unsigned long base) > +{ > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "wrfsbase %0" :: "r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) ); > +#endif > + else > + wrmsrl(MSR_FS_BASE, base); > +} > + > +static inline void wrgsbase(unsigned long base) > +{ > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "wrgsbase %0" :: "r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) ); > +#endif > + else > + wrmsrl(MSR_GS_BASE, base); > +} > > DECLARE_PER_CPU(u64, efer); > u64 read_efer(void); > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel --------------020708060507010200080509 Content-Type: text/html; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit
On 10/10/13 14:54, Jan Beulich wrote:
... as being intended to be faster than MSR reads/writes.

In the case of emulate_privileged_op() also use these in favor of the
cached (but possibly stale) addresses from arch.pv_vcpu. This allows
entirely removing the code that was the subject of XSA-67.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>


--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -27,8 +27,8 @@ void save_rest_processor_state(void)
     asm volatile (
         "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)"
         : : "r" (saved_segs) : "memory" );
-    rdmsrl(MSR_FS_BASE, saved_fs_base);
-    rdmsrl(MSR_GS_BASE, saved_gs_base);
+    saved_fs_base = rdfsbase();
+    saved_gs_base = rdgsbase();
     rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
     rdmsrl(MSR_CSTAR, saved_cstar);
     rdmsrl(MSR_LSTAR, saved_lstar);
@@ -56,8 +56,8 @@ void restore_rest_processor_state(void)
           X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF,
           0U);
 
-    wrmsrl(MSR_FS_BASE, saved_fs_base);
-    wrmsrl(MSR_GS_BASE, saved_gs_base);
+    wrfsbase(saved_fs_base);
+    wrgsbase(saved_gs_base);
     wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
 
     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n
     {
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.fs_base )
-            wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base);
+            wrfsbase(n->arch.pv_vcpu.fs_base);
 
         /* Most kernels have non-zero GS base, so don't bother testing. */
         /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
@@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n
 
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.gs_base_user )
-            wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user);
+            wrgsbase(n->arch.pv_vcpu.gs_base_user);
 
         /* If in kernel mode then switch the GS bases around. */
         if ( (n->arch.flags & TF_kernel_mode) )
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb
     if ( cpu_has_smep )
         set_in_cr4(X86_CR4_SMEP);
 
+    if ( cpu_has_fsgsbase )
+        set_in_cr4(X86_CR4_FSGSBASE);
+
     local_irq_enable();
 
     pt_pci_init();
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct 
         }
         else
         {
-            if ( lm_ovr == lm_seg_none || data_sel < 4 )
+            switch ( lm_ovr )
             {
-                switch ( lm_ovr )
-                {
-                case lm_seg_none:
-                    data_base = 0UL;
-                    break;
-                case lm_seg_fs:
-                    data_base = v->arch.pv_vcpu.fs_base;
-                    break;
-                case lm_seg_gs:
-                    if ( guest_kernel_mode(v, regs) )
-                        data_base = v->arch.pv_vcpu.gs_base_kernel;
-                    else
-                        data_base = v->arch.pv_vcpu.gs_base_user;
-                    break;
-                }
+            default:
+                data_base = 0UL;
+                break;
+            case lm_seg_fs:
+                data_base = rdfsbase();
+                break;
+            case lm_seg_gs:
+                data_base = rdgsbase();
+                break;
             }
-            else if ( !read_descriptor(data_sel, v, regs,
-                                       &data_base, &data_limit, &ar, 0) ||
-                      !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
-                goto fail;
             data_limit = ~0UL;
             ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
         }
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
       | (mmu_cr4_features                                   \
          & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
-            X86_CR4_OSXSAVE))                               \
+            X86_CR4_OSXSAVE | X86_CR4_FSGSBASE))            \
       | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
      & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c)                         \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
-             | X86_CR4_OSXSAVE | X86_CR4_SMEP))
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
+             X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
 
 void domain_cpuid(struct domain *d,
                   unsigned int  input,
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -9,6 +9,7 @@
 #include <xen/percpu.h>
 #include <xen/errno.h>
 #include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
 
 #define rdmsr(msr,val1,val2) \
      __asm__ __volatile__("rdmsr" \
@@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in
 			  : "=a" (low), "=d" (high) \
 			  : "c" (counter))
 
+static inline unsigned long rdfsbase(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "rdfsbase %0" : "=r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+#endif
+    else
+        rdmsrl(MSR_FS_BASE, base);
+
+    return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "rdgsbase %0" : "=r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+#endif
+    else
+        rdmsrl(MSR_GS_BASE, base);
+
+    return base;
+}
+
+static inline void wrfsbase(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "wrfsbase %0" :: "r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
+#endif
+    else
+        wrmsrl(MSR_FS_BASE, base);
+}
+
+static inline void wrgsbase(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "wrgsbase %0" :: "r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
+#endif
+    else
+        wrmsrl(MSR_GS_BASE, base);
+}
 
 DECLARE_PER_CPU(u64, efer);
 u64 read_efer(void);




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

--------------020708060507010200080509-- --===============4995821501050519387== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --===============4995821501050519387==--