* [PATCH 1/4] x86: correct LDT checks
2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
@ 2013-10-10 13:53 ` Jan Beulich
2013-10-10 13:54 ` [PATCH 2/4] x86: add address validity check to guest_map_l1e() Jan Beulich
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:53 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 6681 bytes --]
- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as
possible: fail only if the base address is non-canonical
- instead LDT descriptor accesses should fault if the descriptor
address ends up being non-canonical (by ensuring this we at once
avoid reading an entry from the mach-to-phys table and consider it a
page table entry)
- fault propagation on using LDT selectors must distinguish #PF and #GP
(the latter must be raised for a non-canonical descriptor address,
which also applies to several other uses of propagate_page_fault(),
and hence the problem is being fixed there)
- map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs
At once remove the odd invokation of map_ldt_shadow_page() from the
MMUEXT_SET_LDT handler: There's nothing really telling us that the
first LDT page is going to be preferred over others.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -674,12 +674,7 @@ int arch_set_info_guest(
fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs);
}
- /* LDT safety checks. */
- if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) ||
- (c.nat->ldt_ents > 8192) ||
- !array_access_ok(c.nat->ldt_base,
- c.nat->ldt_ents,
- LDT_ENTRY_SIZE) )
+ if ( !__addr_ok(c.nat->ldt_base) )
return -EINVAL;
}
else
@@ -692,15 +687,12 @@ int arch_set_info_guest(
for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
-
- /* LDT safety checks. */
- if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) ||
- (c.cmp->ldt_ents > 8192) ||
- !compat_array_access_ok(c.cmp->ldt_base,
- c.cmp->ldt_ents,
- LDT_ENTRY_SIZE) )
- return -EINVAL;
}
+
+ /* LDT safety checks. */
+ if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) ||
+ (c(ldt_ents) > 8192) )
+ return -EINVAL;
}
v->fpu_initialised = !!(flags & VGCF_I387_VALID);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off
BUG_ON(unlikely(in_irq()));
+ if ( is_pv_32bit_domain(d) )
+ gva = (u32)gva;
guest_get_eff_kern_l1e(v, gva, &l1e);
if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
return 0;
@@ -3229,9 +3231,8 @@ long do_mmuext_op(
MEM_LOG("ignoring SET_LDT hypercall from external domain");
okay = 0;
}
- else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
- (ents > 8192) ||
- !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
+ else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
+ (ents > 8192) )
{
okay = 0;
MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
@@ -3244,8 +3245,6 @@ long do_mmuext_op(
curr->arch.pv_vcpu.ldt_base = ptr;
curr->arch.pv_vcpu.ldt_ents = ents;
load_LDT(curr);
- if ( ents != 0 )
- (void)map_ldt_shadow_page(0);
}
break;
}
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1065,12 +1065,24 @@ static void reserved_bit_page_fault(
show_execution_state(regs);
}
-void propagate_page_fault(unsigned long addr, u16 error_code)
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code)
{
struct trap_info *ti;
struct vcpu *v = current;
struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
+ if ( unlikely(!is_canonical_address(addr)) )
+ {
+ ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault];
+ tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
+ tb->error_code = 0;
+ tb->cs = ti->cs;
+ tb->eip = ti->address;
+ if ( TI_GET_IF(ti) )
+ tb->flags |= TBF_INTERRUPT;
+ return tb;
+ }
+
v->arch.pv_vcpu.ctrlreg[2] = addr;
arch_set_cr2(v, addr);
@@ -1097,6 +1109,8 @@ void propagate_page_fault(unsigned long
if ( unlikely(error_code & PFEC_reserved_bit) )
reserved_bit_page_fault(addr, guest_cpu_user_regs());
+
+ return NULL;
}
static int handle_gdt_ldt_mapping_fault(
@@ -1130,13 +1144,16 @@ static int handle_gdt_ldt_mapping_fault(
}
else
{
+ struct trap_bounce *tb;
+
/* In hypervisor mode? Leave it to the #PF handler to fix up. */
if ( !guest_mode(regs) )
return 0;
- /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
- propagate_page_fault(
- curr->arch.pv_vcpu.ldt_base + offset,
- regs->error_code);
+ /* In guest mode? Propagate fault to guest, with adjusted %cr2. */
+ tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset,
+ regs->error_code);
+ if ( tb )
+ tb->error_code = ((u16)offset & ~3) | 4;
}
}
else
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn);
void make_cr3(struct vcpu *v, unsigned long mfn);
void update_cr3(struct vcpu *v);
int vcpu_destroy_pagetables(struct vcpu *);
-void propagate_page_fault(unsigned long addr, u16 error_code);
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
void *do_page_walk(struct vcpu *v, unsigned long addr);
int __sync_local_execstate(void);
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
if ( likely(!paging_mode_translate(v->domain)) )
{
ASSERT(!paging_mode_external(v->domain));
- if ( __copy_from_user(eff_l1e,
+ if ( !__addr_ok(addr) ||
+ __copy_from_user(eff_l1e,
&__linear_l1_table[l1_linear_offset(addr)],
sizeof(l1_pgentry_t)) != 0 )
*(l1_pgentry_t *)eff_l1e = l1e_empty();
[-- Attachment #2: x86-LDT-checking.patch --]
[-- Type: text/plain, Size: 6704 bytes --]
x86: correct LDT checks
- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as
possible: fail only if the base address is non-canonical
- instead LDT descriptor accesses should fault if the descriptor
address ends up being non-canonical (by ensuring this we at once
avoid reading an entry from the mach-to-phys table and consider it a
page table entry)
- fault propagation on using LDT selectors must distinguish #PF and #GP
(the latter must be raised for a non-canonical descriptor address,
which also applies to several other uses of propagate_page_fault(),
and hence the problem is being fixed there)
- map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs
At once remove the odd invokation of map_ldt_shadow_page() from the
MMUEXT_SET_LDT handler: There's nothing really telling us that the
first LDT page is going to be preferred over others.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -674,12 +674,7 @@ int arch_set_info_guest(
fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs);
}
- /* LDT safety checks. */
- if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) ||
- (c.nat->ldt_ents > 8192) ||
- !array_access_ok(c.nat->ldt_base,
- c.nat->ldt_ents,
- LDT_ENTRY_SIZE) )
+ if ( !__addr_ok(c.nat->ldt_base) )
return -EINVAL;
}
else
@@ -692,15 +687,12 @@ int arch_set_info_guest(
for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
-
- /* LDT safety checks. */
- if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) ||
- (c.cmp->ldt_ents > 8192) ||
- !compat_array_access_ok(c.cmp->ldt_base,
- c.cmp->ldt_ents,
- LDT_ENTRY_SIZE) )
- return -EINVAL;
}
+
+ /* LDT safety checks. */
+ if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) ||
+ (c(ldt_ents) > 8192) )
+ return -EINVAL;
}
v->fpu_initialised = !!(flags & VGCF_I387_VALID);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off
BUG_ON(unlikely(in_irq()));
+ if ( is_pv_32bit_domain(d) )
+ gva = (u32)gva;
guest_get_eff_kern_l1e(v, gva, &l1e);
if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
return 0;
@@ -3229,9 +3231,8 @@ long do_mmuext_op(
MEM_LOG("ignoring SET_LDT hypercall from external domain");
okay = 0;
}
- else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
- (ents > 8192) ||
- !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
+ else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
+ (ents > 8192) )
{
okay = 0;
MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
@@ -3244,8 +3245,6 @@ long do_mmuext_op(
curr->arch.pv_vcpu.ldt_base = ptr;
curr->arch.pv_vcpu.ldt_ents = ents;
load_LDT(curr);
- if ( ents != 0 )
- (void)map_ldt_shadow_page(0);
}
break;
}
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1065,12 +1065,24 @@ static void reserved_bit_page_fault(
show_execution_state(regs);
}
-void propagate_page_fault(unsigned long addr, u16 error_code)
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code)
{
struct trap_info *ti;
struct vcpu *v = current;
struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
+ if ( unlikely(!is_canonical_address(addr)) )
+ {
+ ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault];
+ tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
+ tb->error_code = 0;
+ tb->cs = ti->cs;
+ tb->eip = ti->address;
+ if ( TI_GET_IF(ti) )
+ tb->flags |= TBF_INTERRUPT;
+ return tb;
+ }
+
v->arch.pv_vcpu.ctrlreg[2] = addr;
arch_set_cr2(v, addr);
@@ -1097,6 +1109,8 @@ void propagate_page_fault(unsigned long
if ( unlikely(error_code & PFEC_reserved_bit) )
reserved_bit_page_fault(addr, guest_cpu_user_regs());
+
+ return NULL;
}
static int handle_gdt_ldt_mapping_fault(
@@ -1130,13 +1144,16 @@ static int handle_gdt_ldt_mapping_fault(
}
else
{
+ struct trap_bounce *tb;
+
/* In hypervisor mode? Leave it to the #PF handler to fix up. */
if ( !guest_mode(regs) )
return 0;
- /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
- propagate_page_fault(
- curr->arch.pv_vcpu.ldt_base + offset,
- regs->error_code);
+ /* In guest mode? Propagate fault to guest, with adjusted %cr2. */
+ tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset,
+ regs->error_code);
+ if ( tb )
+ tb->error_code = ((u16)offset & ~3) | 4;
}
}
else
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn);
void make_cr3(struct vcpu *v, unsigned long mfn);
void update_cr3(struct vcpu *v);
int vcpu_destroy_pagetables(struct vcpu *);
-void propagate_page_fault(unsigned long addr, u16 error_code);
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
void *do_page_walk(struct vcpu *v, unsigned long addr);
int __sync_local_execstate(void);
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
if ( likely(!paging_mode_translate(v->domain)) )
{
ASSERT(!paging_mode_external(v->domain));
- if ( __copy_from_user(eff_l1e,
+ if ( !__addr_ok(addr) ||
+ __copy_from_user(eff_l1e,
&__linear_l1_table[l1_linear_offset(addr)],
sizeof(l1_pgentry_t)) != 0 )
*(l1_pgentry_t *)eff_l1e = l1e_empty();
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available
2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
2013-10-10 13:53 ` [PATCH 1/4] x86: correct LDT checks Jan Beulich
2013-10-10 13:54 ` [PATCH 2/4] x86: add address validity check to guest_map_l1e() Jan Beulich
@ 2013-10-10 13:54 ` Jan Beulich
2013-10-10 14:15 ` Andrew Cooper
2013-10-10 13:55 ` [PATCH 4/4] x86: check for canonical address before doing page walks Jan Beulich
2013-10-10 15:13 ` [PATCH 0/4] x86: XSA-67 follow-up Keir Fraser
4 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:54 UTC (permalink / raw)
To: xen-devel; +Cc: Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 7197 bytes --]
... as being intended to be faster than MSR reads/writes.
In the case of emulate_privileged_op() also use these in favor of the
cached (but possibly stale) addresses from arch.pv_vcpu. This allows
entirely removing the code that was the subject of XSA-67.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD
$(call cc-option-add,CFLAGS,CC,-Wnested-externs)
$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
ifeq ($(supervisor_mode_kernel),y)
CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -27,8 +27,8 @@ void save_rest_processor_state(void)
asm volatile (
"movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)"
: : "r" (saved_segs) : "memory" );
- rdmsrl(MSR_FS_BASE, saved_fs_base);
- rdmsrl(MSR_GS_BASE, saved_gs_base);
+ saved_fs_base = rdfsbase();
+ saved_gs_base = rdgsbase();
rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
rdmsrl(MSR_CSTAR, saved_cstar);
rdmsrl(MSR_LSTAR, saved_lstar);
@@ -56,8 +56,8 @@ void restore_rest_processor_state(void)
X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF,
0U);
- wrmsrl(MSR_FS_BASE, saved_fs_base);
- wrmsrl(MSR_GS_BASE, saved_gs_base);
+ wrfsbase(saved_fs_base);
+ wrgsbase(saved_gs_base);
wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n
{
/* This can only be non-zero if selector is NULL. */
if ( n->arch.pv_vcpu.fs_base )
- wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base);
+ wrfsbase(n->arch.pv_vcpu.fs_base);
/* Most kernels have non-zero GS base, so don't bother testing. */
/* (This is also a serialising instruction, avoiding AMD erratum #88.) */
@@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n
/* This can only be non-zero if selector is NULL. */
if ( n->arch.pv_vcpu.gs_base_user )
- wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user);
+ wrgsbase(n->arch.pv_vcpu.gs_base_user);
/* If in kernel mode then switch the GS bases around. */
if ( (n->arch.flags & TF_kernel_mode) )
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb
if ( cpu_has_smep )
set_in_cr4(X86_CR4_SMEP);
+ if ( cpu_has_fsgsbase )
+ set_in_cr4(X86_CR4_FSGSBASE);
+
local_irq_enable();
pt_pci_init();
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct
}
else
{
- if ( lm_ovr == lm_seg_none || data_sel < 4 )
+ switch ( lm_ovr )
{
- switch ( lm_ovr )
- {
- case lm_seg_none:
- data_base = 0UL;
- break;
- case lm_seg_fs:
- data_base = v->arch.pv_vcpu.fs_base;
- break;
- case lm_seg_gs:
- if ( guest_kernel_mode(v, regs) )
- data_base = v->arch.pv_vcpu.gs_base_kernel;
- else
- data_base = v->arch.pv_vcpu.gs_base_user;
- break;
- }
+ default:
+ data_base = 0UL;
+ break;
+ case lm_seg_fs:
+ data_base = rdfsbase();
+ break;
+ case lm_seg_gs:
+ data_base = rdgsbase();
+ break;
}
- else if ( !read_descriptor(data_sel, v, regs,
- &data_base, &data_limit, &ar, 0) ||
- !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
- goto fail;
data_limit = ~0UL;
ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
}
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s
(((v)->arch.pv_vcpu.ctrlreg[4] \
| (mmu_cr4_features \
& (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
- X86_CR4_OSXSAVE)) \
+ X86_CR4_OSXSAVE | X86_CR4_FSGSBASE)) \
| ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
& ~X86_CR4_DE)
#define real_cr4_to_pv_guest_cr4(c) \
- ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \
- | X86_CR4_OSXSAVE | X86_CR4_SMEP))
+ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
+ X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
void domain_cpuid(struct domain *d,
unsigned int input,
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -9,6 +9,7 @@
#include <xen/percpu.h>
#include <xen/errno.h>
#include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
#define rdmsr(msr,val1,val2) \
__asm__ __volatile__("rdmsr" \
@@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in
: "=a" (low), "=d" (high) \
: "c" (counter))
+static inline unsigned long rdfsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "rdfsbase %0" : "=r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+#endif
+ else
+ rdmsrl(MSR_FS_BASE, base);
+
+ return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "rdgsbase %0" : "=r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+#endif
+ else
+ rdmsrl(MSR_GS_BASE, base);
+
+ return base;
+}
+
+static inline void wrfsbase(unsigned long base)
+{
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "wrfsbase %0" :: "r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
+#endif
+ else
+ wrmsrl(MSR_FS_BASE, base);
+}
+
+static inline void wrgsbase(unsigned long base)
+{
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "wrgsbase %0" :: "r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
+#endif
+ else
+ wrmsrl(MSR_GS_BASE, base);
+}
DECLARE_PER_CPU(u64, efer);
u64 read_efer(void);
[-- Attachment #2: x86-fsgsbase.patch --]
[-- Type: text/plain, Size: 7239 bytes --]
x86: use {rd,wr}{fs,gs}base when available
... as being intended to be faster than MSR reads/writes.
In the case of emulate_privileged_op() also use these in favor of the
cached (but possibly stale) addresses from arch.pv_vcpu. This allows
entirely removing the code that was the subject of XSA-67.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD
$(call cc-option-add,CFLAGS,CC,-Wnested-externs)
$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
ifeq ($(supervisor_mode_kernel),y)
CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -27,8 +27,8 @@ void save_rest_processor_state(void)
asm volatile (
"movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)"
: : "r" (saved_segs) : "memory" );
- rdmsrl(MSR_FS_BASE, saved_fs_base);
- rdmsrl(MSR_GS_BASE, saved_gs_base);
+ saved_fs_base = rdfsbase();
+ saved_gs_base = rdgsbase();
rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
rdmsrl(MSR_CSTAR, saved_cstar);
rdmsrl(MSR_LSTAR, saved_lstar);
@@ -56,8 +56,8 @@ void restore_rest_processor_state(void)
X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF,
0U);
- wrmsrl(MSR_FS_BASE, saved_fs_base);
- wrmsrl(MSR_GS_BASE, saved_gs_base);
+ wrfsbase(saved_fs_base);
+ wrgsbase(saved_gs_base);
wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n
{
/* This can only be non-zero if selector is NULL. */
if ( n->arch.pv_vcpu.fs_base )
- wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base);
+ wrfsbase(n->arch.pv_vcpu.fs_base);
/* Most kernels have non-zero GS base, so don't bother testing. */
/* (This is also a serialising instruction, avoiding AMD erratum #88.) */
@@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n
/* This can only be non-zero if selector is NULL. */
if ( n->arch.pv_vcpu.gs_base_user )
- wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user);
+ wrgsbase(n->arch.pv_vcpu.gs_base_user);
/* If in kernel mode then switch the GS bases around. */
if ( (n->arch.flags & TF_kernel_mode) )
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb
if ( cpu_has_smep )
set_in_cr4(X86_CR4_SMEP);
+ if ( cpu_has_fsgsbase )
+ set_in_cr4(X86_CR4_FSGSBASE);
+
local_irq_enable();
pt_pci_init();
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct
}
else
{
- if ( lm_ovr == lm_seg_none || data_sel < 4 )
+ switch ( lm_ovr )
{
- switch ( lm_ovr )
- {
- case lm_seg_none:
- data_base = 0UL;
- break;
- case lm_seg_fs:
- data_base = v->arch.pv_vcpu.fs_base;
- break;
- case lm_seg_gs:
- if ( guest_kernel_mode(v, regs) )
- data_base = v->arch.pv_vcpu.gs_base_kernel;
- else
- data_base = v->arch.pv_vcpu.gs_base_user;
- break;
- }
+ default:
+ data_base = 0UL;
+ break;
+ case lm_seg_fs:
+ data_base = rdfsbase();
+ break;
+ case lm_seg_gs:
+ data_base = rdgsbase();
+ break;
}
- else if ( !read_descriptor(data_sel, v, regs,
- &data_base, &data_limit, &ar, 0) ||
- !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
- goto fail;
data_limit = ~0UL;
ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
}
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s
(((v)->arch.pv_vcpu.ctrlreg[4] \
| (mmu_cr4_features \
& (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
- X86_CR4_OSXSAVE)) \
+ X86_CR4_OSXSAVE | X86_CR4_FSGSBASE)) \
| ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
& ~X86_CR4_DE)
#define real_cr4_to_pv_guest_cr4(c) \
- ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \
- | X86_CR4_OSXSAVE | X86_CR4_SMEP))
+ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
+ X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
void domain_cpuid(struct domain *d,
unsigned int input,
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -9,6 +9,7 @@
#include <xen/percpu.h>
#include <xen/errno.h>
#include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
#define rdmsr(msr,val1,val2) \
__asm__ __volatile__("rdmsr" \
@@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in
: "=a" (low), "=d" (high) \
: "c" (counter))
+static inline unsigned long rdfsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "rdfsbase %0" : "=r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+#endif
+ else
+ rdmsrl(MSR_FS_BASE, base);
+
+ return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "rdgsbase %0" : "=r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+#endif
+ else
+ rdmsrl(MSR_GS_BASE, base);
+
+ return base;
+}
+
+static inline void wrfsbase(unsigned long base)
+{
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "wrfsbase %0" :: "r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
+#endif
+ else
+ wrmsrl(MSR_FS_BASE, base);
+}
+
+static inline void wrgsbase(unsigned long base)
+{
+ if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+ asm volatile ( "wrgsbase %0" :: "r" (base) );
+#else
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
+#endif
+ else
+ wrmsrl(MSR_GS_BASE, base);
+}
DECLARE_PER_CPU(u64, efer);
u64 read_efer(void);
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 11+ messages in thread