* [PATCH 0/3] guest context management adjustments
@ 2014-02-05 14:48 Jan Beulich
2014-02-05 14:52 ` [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature Jan Beulich
` (4 more replies)
0 siblings, 5 replies; 12+ messages in thread
From: Jan Beulich @ 2014-02-05 14:48 UTC (permalink / raw)
To: xen-devel; +Cc: George Dunlap, Keir Fraser
1: x86: fix FS/GS base handling when using the fsgsbase feature
2: domctl: also pause domain for extended context updates
3: domctl: pause vCPU for context reads
Signed-off-by: Jan Beulich <jbeulich@suse.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature
2014-02-05 14:48 [PATCH 0/3] guest context management adjustments Jan Beulich
@ 2014-02-05 14:52 ` Jan Beulich
2014-02-05 15:50 ` Andrew Cooper
2014-02-05 14:54 ` [PATCH 2/3] domctl: also pause domain for extended context updates Jan Beulich
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-02-05 14:52 UTC (permalink / raw)
To: xen-devel; +Cc: George Dunlap, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 5075 bytes --]
In that case, due to the respective instructions not being privileged,
we can't rely on our in-memory data to always be correct: While the
guest is running, it may change without us knowing about it. Therefore
we need to
- read the correct values from hardware during context switch out
(save_segments())
- read the correct values from hardware during RDMSR emulation
- update in-memory values during guest mode change
(toggle_guest_mode())
For completeness/consistency, WRMSR emulation is also being switched
to use wr[fg]sbase().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1251,6 +1251,15 @@ static void save_segments(struct vcpu *v
regs->fs = read_segment_register(fs);
regs->gs = read_segment_register(gs);
+ if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) )
+ {
+ v->arch.pv_vcpu.fs_base = __rdfsbase();
+ if ( v->arch.flags & TF_kernel_mode )
+ v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
+ else
+ v->arch.pv_vcpu.gs_base_user = __rdgsbase();
+ }
+
if ( regs->ds )
dirty_segment_mask |= DIRTY_DS;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2382,15 +2382,13 @@ static int emulate_privileged_op(struct
case MSR_FS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_FS_BASE, msr_content) )
- goto fail;
+ wrfsbase(msr_content);
v->arch.pv_vcpu.fs_base = msr_content;
break;
case MSR_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_GS_BASE, msr_content) )
- goto fail;
+ wrgsbase(msr_content);
v->arch.pv_vcpu.gs_base_kernel = msr_content;
break;
case MSR_SHADOW_GS_BASE:
@@ -2535,15 +2533,14 @@ static int emulate_privileged_op(struct
case MSR_FS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- regs->eax = v->arch.pv_vcpu.fs_base & 0xFFFFFFFFUL;
- regs->edx = v->arch.pv_vcpu.fs_base >> 32;
- break;
+ val = cpu_has_fsgsbase ? __rdfsbase() : v->arch.pv_vcpu.fs_base;
+ goto rdmsr_writeback;
case MSR_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- regs->eax = v->arch.pv_vcpu.gs_base_kernel & 0xFFFFFFFFUL;
- regs->edx = v->arch.pv_vcpu.gs_base_kernel >> 32;
- break;
+ val = cpu_has_fsgsbase ? __rdgsbase()
+ : v->arch.pv_vcpu.gs_base_kernel;
+ goto rdmsr_writeback;
case MSR_SHADOW_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -257,6 +257,13 @@ void toggle_guest_mode(struct vcpu *v)
{
if ( is_pv_32bit_vcpu(v) )
return;
+ if ( cpu_has_fsgsbase )
+ {
+ if ( v->arch.flags & TF_kernel_mode )
+ v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
+ else
+ v->arch.pv_vcpu.gs_base_user = __rdgsbase();
+ }
v->arch.flags ^= TF_kernel_mode;
asm volatile ( "swapgs" );
update_cr3(v);
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -98,34 +98,52 @@ static inline int wrmsr_safe(unsigned in
: "=a" (low), "=d" (high) \
: "c" (counter))
-static inline unsigned long rdfsbase(void)
+static inline unsigned long __rdfsbase(void)
{
unsigned long base;
- if ( cpu_has_fsgsbase )
#ifdef HAVE_GAS_FSGSBASE
- asm volatile ( "rdfsbase %0" : "=r" (base) );
+ asm volatile ( "rdfsbase %0" : "=r" (base) );
#else
- asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
#endif
- else
- rdmsrl(MSR_FS_BASE, base);
return base;
}
-static inline unsigned long rdgsbase(void)
+static inline unsigned long __rdgsbase(void)
{
unsigned long base;
- if ( cpu_has_fsgsbase )
#ifdef HAVE_GAS_FSGSBASE
- asm volatile ( "rdgsbase %0" : "=r" (base) );
+ asm volatile ( "rdgsbase %0" : "=r" (base) );
#else
- asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
#endif
- else
- rdmsrl(MSR_GS_BASE, base);
+
+ return base;
+}
+
+static inline unsigned long rdfsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+ return __rdfsbase();
+
+ rdmsrl(MSR_FS_BASE, base);
+
+ return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+ return __rdgsbase();
+
+ rdmsrl(MSR_GS_BASE, base);
return base;
}
[-- Attachment #2: x86-pv-fsgsbase-rw.patch --]
[-- Type: text/plain, Size: 5135 bytes --]
x86: fix FS/GS base handling when using the fsgsbase feature
In that case, due to the respective instructions not being privileged,
we can't rely on our in-memory data to always be correct: While the
guest is running, it may change without us knowing about it. Therefore
we need to
- read the correct values from hardware during context switch out
(save_segments())
- read the correct values from hardware during RDMSR emulation
- update in-memory values during guest mode change
(toggle_guest_mode())
For completeness/consistency, WRMSR emulation is also being switched
to use wr[fg]sbase().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1251,6 +1251,15 @@ static void save_segments(struct vcpu *v
regs->fs = read_segment_register(fs);
regs->gs = read_segment_register(gs);
+ if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) )
+ {
+ v->arch.pv_vcpu.fs_base = __rdfsbase();
+ if ( v->arch.flags & TF_kernel_mode )
+ v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
+ else
+ v->arch.pv_vcpu.gs_base_user = __rdgsbase();
+ }
+
if ( regs->ds )
dirty_segment_mask |= DIRTY_DS;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2382,15 +2382,13 @@ static int emulate_privileged_op(struct
case MSR_FS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_FS_BASE, msr_content) )
- goto fail;
+ wrfsbase(msr_content);
v->arch.pv_vcpu.fs_base = msr_content;
break;
case MSR_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- if ( wrmsr_safe(MSR_GS_BASE, msr_content) )
- goto fail;
+ wrgsbase(msr_content);
v->arch.pv_vcpu.gs_base_kernel = msr_content;
break;
case MSR_SHADOW_GS_BASE:
@@ -2535,15 +2533,14 @@ static int emulate_privileged_op(struct
case MSR_FS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- regs->eax = v->arch.pv_vcpu.fs_base & 0xFFFFFFFFUL;
- regs->edx = v->arch.pv_vcpu.fs_base >> 32;
- break;
+ val = cpu_has_fsgsbase ? __rdfsbase() : v->arch.pv_vcpu.fs_base;
+ goto rdmsr_writeback;
case MSR_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
- regs->eax = v->arch.pv_vcpu.gs_base_kernel & 0xFFFFFFFFUL;
- regs->edx = v->arch.pv_vcpu.gs_base_kernel >> 32;
- break;
+ val = cpu_has_fsgsbase ? __rdgsbase()
+ : v->arch.pv_vcpu.gs_base_kernel;
+ goto rdmsr_writeback;
case MSR_SHADOW_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -257,6 +257,13 @@ void toggle_guest_mode(struct vcpu *v)
{
if ( is_pv_32bit_vcpu(v) )
return;
+ if ( cpu_has_fsgsbase )
+ {
+ if ( v->arch.flags & TF_kernel_mode )
+ v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
+ else
+ v->arch.pv_vcpu.gs_base_user = __rdgsbase();
+ }
v->arch.flags ^= TF_kernel_mode;
asm volatile ( "swapgs" );
update_cr3(v);
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -98,34 +98,52 @@ static inline int wrmsr_safe(unsigned in
: "=a" (low), "=d" (high) \
: "c" (counter))
-static inline unsigned long rdfsbase(void)
+static inline unsigned long __rdfsbase(void)
{
unsigned long base;
- if ( cpu_has_fsgsbase )
#ifdef HAVE_GAS_FSGSBASE
- asm volatile ( "rdfsbase %0" : "=r" (base) );
+ asm volatile ( "rdfsbase %0" : "=r" (base) );
#else
- asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
#endif
- else
- rdmsrl(MSR_FS_BASE, base);
return base;
}
-static inline unsigned long rdgsbase(void)
+static inline unsigned long __rdgsbase(void)
{
unsigned long base;
- if ( cpu_has_fsgsbase )
#ifdef HAVE_GAS_FSGSBASE
- asm volatile ( "rdgsbase %0" : "=r" (base) );
+ asm volatile ( "rdgsbase %0" : "=r" (base) );
#else
- asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+ asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
#endif
- else
- rdmsrl(MSR_GS_BASE, base);
+
+ return base;
+}
+
+static inline unsigned long rdfsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+ return __rdfsbase();
+
+ rdmsrl(MSR_FS_BASE, base);
+
+ return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+ unsigned long base;
+
+ if ( cpu_has_fsgsbase )
+ return __rdgsbase();
+
+ rdmsrl(MSR_GS_BASE, base);
return base;
}
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 2/3] domctl: also pause domain for extended context updates
2014-02-05 14:48 [PATCH 0/3] guest context management adjustments Jan Beulich
2014-02-05 14:52 ` [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature Jan Beulich
@ 2014-02-05 14:54 ` Jan Beulich
2014-02-05 15:05 ` Andrew Cooper
2014-02-05 14:54 ` [PATCH 3/3] domctl: pause vCPU for context reads Jan Beulich
` (2 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-02-05 14:54 UTC (permalink / raw)
To: xen-devel; +Cc: George Dunlap, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 2216 bytes --]
This is not just for consistency with "base" context updates, but
actually needed so that guest side accesses can't race with control
domain side updates.
This would have been a security issue if XSA-77 hadn't waived them on
the affected domctl operation.
While looking at the code I also spotted a redundant NULL check in the
"base" context update handling code, which is being removed.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -853,6 +853,8 @@ long arch_do_domctl(
}
else
{
+ if ( d == current->domain ) /* no domain_pause() */
+ break;
ret = -EINVAL;
if ( evc->size < offsetof(typeof(*evc), vmce) )
break;
@@ -861,6 +863,7 @@ long arch_do_domctl(
if ( !is_canonical_address(evc->sysenter_callback_eip) ||
!is_canonical_address(evc->syscall32_callback_eip) )
break;
+ domain_pause(d);
fixup_guest_code_selector(d, evc->sysenter_callback_cs);
v->arch.pv_vcpu.sysenter_callback_cs =
evc->sysenter_callback_cs;
@@ -881,6 +884,8 @@ long arch_do_domctl(
(evc->syscall32_callback_cs & ~3) ||
evc->syscall32_callback_eip )
break;
+ else
+ domain_pause(d);
BUILD_BUG_ON(offsetof(struct xen_domctl_ext_vcpucontext,
mcg_cap) !=
@@ -899,6 +904,8 @@ long arch_do_domctl(
}
else
ret = 0;
+
+ domain_unpause(d);
}
}
break;
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -334,10 +334,6 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
unsigned int vcpu = op->u.vcpucontext.vcpu;
struct vcpu *v;
- ret = -ESRCH;
- if ( d == NULL )
- break;
-
ret = -EINVAL;
if ( (d == current->domain) || /* no domain_pause() */
(vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
[-- Attachment #2: vcpu-set-context-pause.patch --]
[-- Type: text/plain, Size: 2270 bytes --]
domctl: also pause domain for "extended" context updates
This is not just for consistency with "base" context updates, but
actually needed so that guest side accesses can't race with control
domain side updates.
This would have been a security issue if XSA-77 hadn't waived them on
the affected domctl operation.
While looking at the code I also spotted a redundant NULL check in the
"base" context update handling code, which is being removed.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -853,6 +853,8 @@ long arch_do_domctl(
}
else
{
+ if ( d == current->domain ) /* no domain_pause() */
+ break;
ret = -EINVAL;
if ( evc->size < offsetof(typeof(*evc), vmce) )
break;
@@ -861,6 +863,7 @@ long arch_do_domctl(
if ( !is_canonical_address(evc->sysenter_callback_eip) ||
!is_canonical_address(evc->syscall32_callback_eip) )
break;
+ domain_pause(d);
fixup_guest_code_selector(d, evc->sysenter_callback_cs);
v->arch.pv_vcpu.sysenter_callback_cs =
evc->sysenter_callback_cs;
@@ -881,6 +884,8 @@ long arch_do_domctl(
(evc->syscall32_callback_cs & ~3) ||
evc->syscall32_callback_eip )
break;
+ else
+ domain_pause(d);
BUILD_BUG_ON(offsetof(struct xen_domctl_ext_vcpucontext,
mcg_cap) !=
@@ -899,6 +904,8 @@ long arch_do_domctl(
}
else
ret = 0;
+
+ domain_unpause(d);
}
}
break;
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -334,10 +334,6 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
unsigned int vcpu = op->u.vcpucontext.vcpu;
struct vcpu *v;
- ret = -ESRCH;
- if ( d == NULL )
- break;
-
ret = -EINVAL;
if ( (d == current->domain) || /* no domain_pause() */
(vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 3/3] domctl: pause vCPU for context reads
2014-02-05 14:48 [PATCH 0/3] guest context management adjustments Jan Beulich
2014-02-05 14:52 ` [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature Jan Beulich
2014-02-05 14:54 ` [PATCH 2/3] domctl: also pause domain for extended context updates Jan Beulich
@ 2014-02-05 14:54 ` Jan Beulich
2014-02-05 15:29 ` Andrew Cooper
2014-02-05 14:55 ` [PATCH 0/3] guest context management adjustments Jan Beulich
2014-02-05 15:35 ` Keir Fraser
4 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-02-05 14:54 UTC (permalink / raw)
To: xen-devel; +Cc: George Dunlap, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 2238 bytes --]
"Base" context reads already paused the subject vCPU when being the
current one, but that special case isn't being properly dealt with
anyway (at the very least when x86's fsgsbase feature is in use), so
just disallow it.
"Extended" context reads so far didn't do any pausing.
While we can't avoid the reported data being stale by the time it
arrives at the caller, this way we at least guarantee that it is
consistent.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -819,7 +819,13 @@ long arch_do_domctl(
if ( domctl->cmd == XEN_DOMCTL_get_ext_vcpucontext )
{
+ if ( v == current ) /* no vcpu_pause() */
+ break;
+
evc->size = sizeof(*evc);
+
+ vcpu_pause(v);
+
if ( is_pv_domain(d) )
{
evc->sysenter_callback_cs =
@@ -849,6 +855,7 @@ long arch_do_domctl(
evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
ret = 0;
+ vcpu_unpause(v);
copyback = 1;
}
else
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -675,11 +675,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
struct vcpu *v;
ret = -EINVAL;
- if ( op->u.vcpucontext.vcpu >= d->max_vcpus )
- goto getvcpucontext_out;
-
- ret = -ESRCH;
- if ( (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL )
+ if ( op->u.vcpucontext.vcpu >= d->max_vcpus ||
+ (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL ||
+ v == current ) /* no vcpu_pause() */
goto getvcpucontext_out;
ret = -ENODATA;
@@ -694,14 +692,12 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
goto getvcpucontext_out;
- if ( v != current )
- vcpu_pause(v);
+ vcpu_pause(v);
arch_get_info_guest(v, c);
ret = 0;
- if ( v != current )
- vcpu_unpause(v);
+ vcpu_unpause(v);
#ifdef CONFIG_COMPAT
if ( !is_pv_32on64_vcpu(v) )
[-- Attachment #2: vcpu-get-context-pause.patch --]
[-- Type: text/plain, Size: 2272 bytes --]
domctl: pause vCPU for context reads
"Base" context reads already paused the subject vCPU when being the
current one, but that special case isn't being properly dealt with
anyway (at the very least when x86's fsgsbase feature is in use), so
just disallow it.
"Extended" context reads so far didn't do any pausing.
While we can't avoid the reported data being stale by the time it
arrives at the caller, this way we at least guarantee that it is
consistent.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -819,7 +819,13 @@ long arch_do_domctl(
if ( domctl->cmd == XEN_DOMCTL_get_ext_vcpucontext )
{
+ if ( v == current ) /* no vcpu_pause() */
+ break;
+
evc->size = sizeof(*evc);
+
+ vcpu_pause(v);
+
if ( is_pv_domain(d) )
{
evc->sysenter_callback_cs =
@@ -849,6 +855,7 @@ long arch_do_domctl(
evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
ret = 0;
+ vcpu_unpause(v);
copyback = 1;
}
else
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -675,11 +675,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
struct vcpu *v;
ret = -EINVAL;
- if ( op->u.vcpucontext.vcpu >= d->max_vcpus )
- goto getvcpucontext_out;
-
- ret = -ESRCH;
- if ( (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL )
+ if ( op->u.vcpucontext.vcpu >= d->max_vcpus ||
+ (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL ||
+ v == current ) /* no vcpu_pause() */
goto getvcpucontext_out;
ret = -ENODATA;
@@ -694,14 +692,12 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
goto getvcpucontext_out;
- if ( v != current )
- vcpu_pause(v);
+ vcpu_pause(v);
arch_get_info_guest(v, c);
ret = 0;
- if ( v != current )
- vcpu_unpause(v);
+ vcpu_unpause(v);
#ifdef CONFIG_COMPAT
if ( !is_pv_32on64_vcpu(v) )
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 0/3] guest context management adjustments
2014-02-05 14:48 [PATCH 0/3] guest context management adjustments Jan Beulich
` (2 preceding siblings ...)
2014-02-05 14:54 ` [PATCH 3/3] domctl: pause vCPU for context reads Jan Beulich
@ 2014-02-05 14:55 ` Jan Beulich
2014-02-05 16:02 ` George Dunlap
2014-02-05 15:35 ` Keir Fraser
4 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-02-05 14:55 UTC (permalink / raw)
To: xen-devel; +Cc: George Dunlap, Keir Fraser
>>> On 05.02.14 at 15:48, "Jan Beulich" <JBeulich@suse.com> wrote:
> 1: x86: fix FS/GS base handling when using the fsgsbase feature
> 2: domctl: also pause domain for extended context updates
> 3: domctl: pause vCPU for context reads
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
And I think is goes without saying that this being bug fixes I expect
them to be suitable for 4.4.
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/3] domctl: also pause domain for extended context updates
2014-02-05 14:54 ` [PATCH 2/3] domctl: also pause domain for extended context updates Jan Beulich
@ 2014-02-05 15:05 ` Andrew Cooper
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Cooper @ 2014-02-05 15:05 UTC (permalink / raw)
To: Jan Beulich; +Cc: George Dunlap, xen-devel, Keir Fraser
[-- Attachment #1.1: Type: text/plain, Size: 2508 bytes --]
On 05/02/14 14:54, Jan Beulich wrote:
> This is not just for consistency with "base" context updates, but
> actually needed so that guest side accesses can't race with control
> domain side updates.
>
> This would have been a security issue if XSA-77 hadn't waived them on
> the affected domctl operation.
>
> While looking at the code I also spotted a redundant NULL check in the
> "base" context update handling code, which is being removed.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -853,6 +853,8 @@ long arch_do_domctl(
> }
> else
> {
> + if ( d == current->domain ) /* no domain_pause() */
> + break;
> ret = -EINVAL;
> if ( evc->size < offsetof(typeof(*evc), vmce) )
> break;
> @@ -861,6 +863,7 @@ long arch_do_domctl(
> if ( !is_canonical_address(evc->sysenter_callback_eip) ||
> !is_canonical_address(evc->syscall32_callback_eip) )
> break;
> + domain_pause(d);
> fixup_guest_code_selector(d, evc->sysenter_callback_cs);
> v->arch.pv_vcpu.sysenter_callback_cs =
> evc->sysenter_callback_cs;
> @@ -881,6 +884,8 @@ long arch_do_domctl(
> (evc->syscall32_callback_cs & ~3) ||
> evc->syscall32_callback_eip )
> break;
> + else
> + domain_pause(d);
>
> BUILD_BUG_ON(offsetof(struct xen_domctl_ext_vcpucontext,
> mcg_cap) !=
> @@ -899,6 +904,8 @@ long arch_do_domctl(
> }
> else
> ret = 0;
> +
> + domain_unpause(d);
> }
> }
> break;
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -334,10 +334,6 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
> unsigned int vcpu = op->u.vcpucontext.vcpu;
> struct vcpu *v;
>
> - ret = -ESRCH;
> - if ( d == NULL )
> - break;
> -
> ret = -EINVAL;
> if ( (d == current->domain) || /* no domain_pause() */
> (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 3359 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] domctl: pause vCPU for context reads
2014-02-05 14:54 ` [PATCH 3/3] domctl: pause vCPU for context reads Jan Beulich
@ 2014-02-05 15:29 ` Andrew Cooper
2014-02-05 15:39 ` Jan Beulich
0 siblings, 1 reply; 12+ messages in thread
From: Andrew Cooper @ 2014-02-05 15:29 UTC (permalink / raw)
To: Jan Beulich; +Cc: George Dunlap, xen-devel, Keir Fraser
[-- Attachment #1.1: Type: text/plain, Size: 2626 bytes --]
On 05/02/14 14:54, Jan Beulich wrote:
> "Base" context reads already paused the subject vCPU when being the
> current one, but that special case isn't being properly dealt with
> anyway (at the very least when x86's fsgsbase feature is in use), so
> just disallow it.
>
> "Extended" context reads so far didn't do any pausing.
>
> While we can't avoid the reported data being stale by the time it
> arrives at the caller, this way we at least guarantee that it is
> consistent.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Now I come to think about it, is this an ABI change, as we are now
disallowing a control domain to issue these hypercalls on itself?
~Andrew
>
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -819,7 +819,13 @@ long arch_do_domctl(
>
> if ( domctl->cmd == XEN_DOMCTL_get_ext_vcpucontext )
> {
> + if ( v == current ) /* no vcpu_pause() */
> + break;
> +
> evc->size = sizeof(*evc);
> +
> + vcpu_pause(v);
> +
> if ( is_pv_domain(d) )
> {
> evc->sysenter_callback_cs =
> @@ -849,6 +855,7 @@ long arch_do_domctl(
> evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
>
> ret = 0;
> + vcpu_unpause(v);
> copyback = 1;
> }
> else
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -675,11 +675,9 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
> struct vcpu *v;
>
> ret = -EINVAL;
> - if ( op->u.vcpucontext.vcpu >= d->max_vcpus )
> - goto getvcpucontext_out;
> -
> - ret = -ESRCH;
> - if ( (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL )
> + if ( op->u.vcpucontext.vcpu >= d->max_vcpus ||
> + (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL ||
> + v == current ) /* no vcpu_pause() */
> goto getvcpucontext_out;
>
> ret = -ENODATA;
> @@ -694,14 +692,12 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
> if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
> goto getvcpucontext_out;
>
> - if ( v != current )
> - vcpu_pause(v);
> + vcpu_pause(v);
>
> arch_get_info_guest(v, c);
> ret = 0;
>
> - if ( v != current )
> - vcpu_unpause(v);
> + vcpu_unpause(v);
>
> #ifdef CONFIG_COMPAT
> if ( !is_pv_32on64_vcpu(v) )
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 3392 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 0/3] guest context management adjustments
2014-02-05 14:48 [PATCH 0/3] guest context management adjustments Jan Beulich
` (3 preceding siblings ...)
2014-02-05 14:55 ` [PATCH 0/3] guest context management adjustments Jan Beulich
@ 2014-02-05 15:35 ` Keir Fraser
4 siblings, 0 replies; 12+ messages in thread
From: Keir Fraser @ 2014-02-05 15:35 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: George Dunlap
On 05/02/2014 14:48, "Jan Beulich" <JBeulich@suse.com> wrote:
> 1: x86: fix FS/GS base handling when using the fsgsbase feature
> 2: domctl: also pause domain for extended context updates
> 3: domctl: pause vCPU for context reads
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] domctl: pause vCPU for context reads
2014-02-05 15:29 ` Andrew Cooper
@ 2014-02-05 15:39 ` Jan Beulich
2014-02-05 15:43 ` Andrew Cooper
0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-02-05 15:39 UTC (permalink / raw)
To: Andrew Cooper; +Cc: George Dunlap, xen-devel, Keir Fraser
>>> On 05.02.14 at 16:29, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 05/02/14 14:54, Jan Beulich wrote:
>> "Base" context reads already paused the subject vCPU when being the
>> current one, but that special case isn't being properly dealt with
>> anyway (at the very least when x86's fsgsbase feature is in use), so
>> just disallow it.
>>
>> "Extended" context reads so far didn't do any pausing.
>>
>> While we can't avoid the reported data being stale by the time it
>> arrives at the caller, this way we at least guarantee that it is
>> consistent.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> Now I come to think about it, is this an ABI change, as we are now
> disallowing a control domain to issue these hypercalls on itself?
Of course it is, and intentionally so. As was patch 2. And imo it
was never correct to allow this.
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] domctl: pause vCPU for context reads
2014-02-05 15:39 ` Jan Beulich
@ 2014-02-05 15:43 ` Andrew Cooper
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Cooper @ 2014-02-05 15:43 UTC (permalink / raw)
To: Jan Beulich; +Cc: George Dunlap, xen-devel, Keir Fraser
On 05/02/14 15:39, Jan Beulich wrote:
>>>> On 05.02.14 at 16:29, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 05/02/14 14:54, Jan Beulich wrote:
>>> "Base" context reads already paused the subject vCPU when being the
>>> current one, but that special case isn't being properly dealt with
>>> anyway (at the very least when x86's fsgsbase feature is in use), so
>>> just disallow it.
>>>
>>> "Extended" context reads so far didn't do any pausing.
>>>
>>> While we can't avoid the reported data being stale by the time it
>>> arrives at the caller, this way we at least guarantee that it is
>>> consistent.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Now I come to think about it, is this an ABI change, as we are now
>> disallowing a control domain to issue these hypercalls on itself?
> Of course it is, and intentionally so. As was patch 2. And imo it
> was never correct to allow this.
>
> Jan
>
It is certainly possible to get libxc to do this, but I would agree that
it has no valid use.
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature
2014-02-05 14:52 ` [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature Jan Beulich
@ 2014-02-05 15:50 ` Andrew Cooper
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Cooper @ 2014-02-05 15:50 UTC (permalink / raw)
To: Jan Beulich; +Cc: George Dunlap, xen-devel, Keir Fraser
[-- Attachment #1.1: Type: text/plain, Size: 5463 bytes --]
On 05/02/14 14:52, Jan Beulich wrote:
> In that case, due to the respective instructions not being privileged,
> we can't rely on our in-memory data to always be correct: While the
> guest is running, it may change without us knowing about it. Therefore
> we need to
> - read the correct values from hardware during context switch out
> (save_segments())
> - read the correct values from hardware during RDMSR emulation
> - update in-memory values during guest mode change
> (toggle_guest_mode())
>
> For completeness/consistency, WRMSR emulation is also being switched
> to use wr[fg]sbase().
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1251,6 +1251,15 @@ static void save_segments(struct vcpu *v
> regs->fs = read_segment_register(fs);
> regs->gs = read_segment_register(gs);
>
> + if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) )
> + {
> + v->arch.pv_vcpu.fs_base = __rdfsbase();
> + if ( v->arch.flags & TF_kernel_mode )
> + v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
> + else
> + v->arch.pv_vcpu.gs_base_user = __rdgsbase();
> + }
> +
> if ( regs->ds )
> dirty_segment_mask |= DIRTY_DS;
>
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -2382,15 +2382,13 @@ static int emulate_privileged_op(struct
> case MSR_FS_BASE:
> if ( is_pv_32on64_vcpu(v) )
> goto fail;
> - if ( wrmsr_safe(MSR_FS_BASE, msr_content) )
> - goto fail;
> + wrfsbase(msr_content);
> v->arch.pv_vcpu.fs_base = msr_content;
> break;
> case MSR_GS_BASE:
> if ( is_pv_32on64_vcpu(v) )
> goto fail;
> - if ( wrmsr_safe(MSR_GS_BASE, msr_content) )
> - goto fail;
> + wrgsbase(msr_content);
> v->arch.pv_vcpu.gs_base_kernel = msr_content;
> break;
> case MSR_SHADOW_GS_BASE:
> @@ -2535,15 +2533,14 @@ static int emulate_privileged_op(struct
> case MSR_FS_BASE:
> if ( is_pv_32on64_vcpu(v) )
> goto fail;
> - regs->eax = v->arch.pv_vcpu.fs_base & 0xFFFFFFFFUL;
> - regs->edx = v->arch.pv_vcpu.fs_base >> 32;
> - break;
> + val = cpu_has_fsgsbase ? __rdfsbase() : v->arch.pv_vcpu.fs_base;
> + goto rdmsr_writeback;
> case MSR_GS_BASE:
> if ( is_pv_32on64_vcpu(v) )
> goto fail;
> - regs->eax = v->arch.pv_vcpu.gs_base_kernel & 0xFFFFFFFFUL;
> - regs->edx = v->arch.pv_vcpu.gs_base_kernel >> 32;
> - break;
> + val = cpu_has_fsgsbase ? __rdgsbase()
> + : v->arch.pv_vcpu.gs_base_kernel;
> + goto rdmsr_writeback;
> case MSR_SHADOW_GS_BASE:
> if ( is_pv_32on64_vcpu(v) )
> goto fail;
> --- a/xen/arch/x86/x86_64/traps.c
> +++ b/xen/arch/x86/x86_64/traps.c
> @@ -257,6 +257,13 @@ void toggle_guest_mode(struct vcpu *v)
> {
> if ( is_pv_32bit_vcpu(v) )
> return;
> + if ( cpu_has_fsgsbase )
> + {
> + if ( v->arch.flags & TF_kernel_mode )
> + v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
> + else
> + v->arch.pv_vcpu.gs_base_user = __rdgsbase();
> + }
> v->arch.flags ^= TF_kernel_mode;
> asm volatile ( "swapgs" );
> update_cr3(v);
> --- a/xen/include/asm-x86/msr.h
> +++ b/xen/include/asm-x86/msr.h
> @@ -98,34 +98,52 @@ static inline int wrmsr_safe(unsigned in
> : "=a" (low), "=d" (high) \
> : "c" (counter))
>
> -static inline unsigned long rdfsbase(void)
> +static inline unsigned long __rdfsbase(void)
> {
> unsigned long base;
>
> - if ( cpu_has_fsgsbase )
> #ifdef HAVE_GAS_FSGSBASE
> - asm volatile ( "rdfsbase %0" : "=r" (base) );
> + asm volatile ( "rdfsbase %0" : "=r" (base) );
> #else
> - asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
> + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
> #endif
> - else
> - rdmsrl(MSR_FS_BASE, base);
>
> return base;
> }
>
> -static inline unsigned long rdgsbase(void)
> +static inline unsigned long __rdgsbase(void)
> {
> unsigned long base;
>
> - if ( cpu_has_fsgsbase )
> #ifdef HAVE_GAS_FSGSBASE
> - asm volatile ( "rdgsbase %0" : "=r" (base) );
> + asm volatile ( "rdgsbase %0" : "=r" (base) );
> #else
> - asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
> + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
> #endif
> - else
> - rdmsrl(MSR_GS_BASE, base);
> +
> + return base;
> +}
> +
> +static inline unsigned long rdfsbase(void)
> +{
> + unsigned long base;
> +
> + if ( cpu_has_fsgsbase )
> + return __rdfsbase();
> +
> + rdmsrl(MSR_FS_BASE, base);
> +
> + return base;
> +}
> +
> +static inline unsigned long rdgsbase(void)
> +{
> + unsigned long base;
> +
> + if ( cpu_has_fsgsbase )
> + return __rdgsbase();
> +
> + rdmsrl(MSR_GS_BASE, base);
>
> return base;
> }
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 6180 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 0/3] guest context management adjustments
2014-02-05 14:55 ` [PATCH 0/3] guest context management adjustments Jan Beulich
@ 2014-02-05 16:02 ` George Dunlap
0 siblings, 0 replies; 12+ messages in thread
From: George Dunlap @ 2014-02-05 16:02 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Keir Fraser
On 02/05/2014 02:55 PM, Jan Beulich wrote:
>>>> On 05.02.14 at 15:48, "Jan Beulich" <JBeulich@suse.com> wrote:
>> 1: x86: fix FS/GS base handling when using the fsgsbase feature
>> 2: domctl: also pause domain for extended context updates
>> 3: domctl: pause vCPU for context reads
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> And I think is goes without saying that this being bug fixes I expect
> them to be suitable for 4.4.
Release-acked-by: George Dunlap <george.dunlap@eu.citrix.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2014-02-05 16:03 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-05 14:48 [PATCH 0/3] guest context management adjustments Jan Beulich
2014-02-05 14:52 ` [PATCH 1/3] x86: fix FS/GS base handling when using the fsgsbase feature Jan Beulich
2014-02-05 15:50 ` Andrew Cooper
2014-02-05 14:54 ` [PATCH 2/3] domctl: also pause domain for extended context updates Jan Beulich
2014-02-05 15:05 ` Andrew Cooper
2014-02-05 14:54 ` [PATCH 3/3] domctl: pause vCPU for context reads Jan Beulich
2014-02-05 15:29 ` Andrew Cooper
2014-02-05 15:39 ` Jan Beulich
2014-02-05 15:43 ` Andrew Cooper
2014-02-05 14:55 ` [PATCH 0/3] guest context management adjustments Jan Beulich
2014-02-05 16:02 ` George Dunlap
2014-02-05 15:35 ` Keir Fraser
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).