* [PATCH 3/4] KVM: Fix segment descriptor loading.
2010-02-18 10:14 [PATCH 0/4] More emulator correctness fixes Gleb Natapov
2010-02-18 10:14 ` [PATCH 1/4] KVM: Forbid modifying CS segment register by mov instruction Gleb Natapov
2010-02-18 10:15 ` [PATCH 2/4] KVM: Fix load_guest_segment_descriptor() to inject page fault Gleb Natapov
@ 2010-02-18 10:15 ` Gleb Natapov
2010-02-18 10:15 ` [PATCH 4/4] KVM: Fix emulate_sys[call, enter, exit]()'s fault handling Gleb Natapov
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Gleb Natapov @ 2010-02-18 10:15 UTC (permalink / raw)
To: avi, mtosatti; +Cc: kvm
Add proper error and permission checking. This patch also change task
switching code to load segment selectors before segment descriptors, like
SDM requires, otherwise permission checking during segment descriptor
loading will be incorrect.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 3 +-
arch/x86/kvm/emulate.c | 30 ++-----
arch/x86/kvm/x86.c | 177 +++++++++++++++++++++++++++++++--------
3 files changed, 151 insertions(+), 59 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 96b1e6e..d46e791 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -609,8 +609,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
- int type_bits, int seg);
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 08ac9cf..8d315ab 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1304,7 +1304,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
return rc;
}
@@ -1482,7 +1482,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
return rc;
}
@@ -2118,12 +2118,11 @@ special_insn:
break;
case 0x8e: { /* mov seg, r/m16 */
uint16_t sel;
- int type_bits;
- int err;
sel = c->src.val;
- if (c->modrm_reg == VCPU_SREG_CS) {
+ if (c->modrm_reg == VCPU_SREG_CS ||
+ c->modrm_reg > VCPU_SREG_GS) {
kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
goto done;
}
@@ -2131,18 +2130,7 @@ special_insn:
if (c->modrm_reg == VCPU_SREG_SS)
toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
- if (c->modrm_reg <= 5) {
- type_bits = (c->modrm_reg == 1) ? 9 : 1;
- err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
- type_bits, c->modrm_reg);
- } else {
- printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
- c->modrm);
- goto cannot_emulate;
- }
-
- if (err < 0)
- goto cannot_emulate;
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
@@ -2316,11 +2304,9 @@ special_insn:
case 0xe9: /* jmp rel */
goto jmp;
case 0xea: /* jmp far */
- if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
- VCPU_SREG_CS) < 0) {
- DPRINTF("jmp far: Failed to load CS descriptor\n");
- goto cannot_emulate;
- }
+ if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
+ VCPU_SREG_CS))
+ goto done;
c->eip = c->src.val;
break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0273980..c43d73d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4787,7 +4787,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
.unusable = 0,
};
kvm_x86_ops->set_segment(vcpu, &segvar, seg);
- return 0;
+ return X86EMUL_CONTINUE;
}
static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
@@ -4797,43 +4797,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
}
-static void kvm_check_segment_descriptor(struct kvm_vcpu *vcpu, int seg,
- u16 selector)
-{
- /* NULL selector is not valid for CS and SS */
- if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
- if (!selector)
- kvm_queue_exception_e(vcpu, TS_VECTOR, selector >> 3);
-}
-
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
- int type_bits, int seg)
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
{
struct kvm_segment kvm_seg;
struct desc_struct seg_desc;
+ u8 dpl, rpl, cpl;
+ unsigned err_vec = GP_VECTOR;
+ u32 err_code = 0;
+ bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
+ int ret;
if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
return kvm_load_realmode_segment(vcpu, selector, seg);
- if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
- return 1;
+ /* NULL selector is not valid for TR, CS and SS */
+ if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
+ && null_selector)
+ goto exception;
+
+ /* TR should be in GDT only */
+ if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
+ goto exception;
+
+ ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
+ if (ret)
+ return ret;
+
seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
- kvm_check_segment_descriptor(vcpu, seg, selector);
- kvm_seg.type |= type_bits;
+ if (null_selector) { /* for NULL selector skip all following checks */
+ kvm_seg.unusable = 1;
+ goto load;
+ }
- if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
- seg != VCPU_SREG_LDTR)
- if (!kvm_seg.s)
- kvm_seg.unusable = 1;
+ err_code = selector & 0xfffc;
+ err_vec = GP_VECTOR;
- kvm_set_segment(vcpu, &kvm_seg, seg);
- if (selector && !kvm_seg.unusable && kvm_seg.s) {
+ /* can't load system descriptor into segment selecor */
+ if (seg <= VCPU_SREG_GS && !kvm_seg.s)
+ goto exception;
+
+ if (!kvm_seg.present) {
+ err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
+ goto exception;
+ }
+
+ rpl = selector & 3;
+ dpl = kvm_seg.dpl;
+ cpl = kvm_x86_ops->get_cpl(vcpu);
+
+ switch (seg) {
+ case VCPU_SREG_SS:
+ /*
+ * segment is not a writable data segment or segment
+ * selector's RPL != CPL or segment selector's RPL != CPL
+ */
+ if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
+ goto exception;
+ break;
+ case VCPU_SREG_CS:
+ if (!(kvm_seg.type & 8))
+ goto exception;
+
+ if (kvm_seg.type & 4) {
+ /* conforming */
+ if (dpl > cpl)
+ goto exception;
+ } else {
+ /* nonconforming */
+ if (rpl > cpl || dpl != cpl)
+ goto exception;
+ }
+ /* CS(RPL) <- CPL */
+ selector = (selector & 0xfffc) | cpl;
+ break;
+ case VCPU_SREG_TR:
+ if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
+ goto exception;
+ break;
+ case VCPU_SREG_LDTR:
+ if (kvm_seg.s || kvm_seg.type != 2)
+ goto exception;
+ break;
+ default: /* DS, ES, FS, or GS */
+ /*
+ * segment is not a data or readable code segment or
+ * ((segment is a data or nonconforming code segment)
+ * and (both RPL and CPL > DPL))
+ */
+ if ((kvm_seg.type & 0xa) == 0x8 ||
+ (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
+ goto exception;
+ break;
+ }
+
+ if (!kvm_seg.unusable && kvm_seg.s) {
/* mark segment as accessed */
+ kvm_seg.type |= 1;
seg_desc.type |= 1;
save_guest_segment_descriptor(vcpu, selector, &seg_desc);
}
- return 0;
+load:
+ kvm_set_segment(vcpu, &kvm_seg, seg);
+ return X86EMUL_CONTINUE;
+exception:
+ kvm_queue_exception_e(vcpu, err_vec, err_code);
+ return X86EMUL_PROPAGATE_FAULT;
}
static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4859,6 +4928,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
}
+static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
+{
+ struct kvm_segment kvm_seg;
+ kvm_get_segment(vcpu, &kvm_seg, seg);
+ kvm_seg.selector = sel;
+ kvm_set_segment(vcpu, &kvm_seg, seg);
+}
+
static int load_state_from_tss32(struct kvm_vcpu *vcpu,
struct tss_segment_32 *tss)
{
@@ -4876,25 +4953,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
- if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+ /*
+ * SDM says that segment selectors are loaded before segment
+ * descriptors
+ */
+ kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
+ kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+ kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+ kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+ kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+ kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
+ kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
+
+ /*
+ * Now load segment descriptors. If fault happenes at this stage
+ * it is handled in a context of new task
+ */
+ if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+ if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+ if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+ if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+ if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
return 1;
return 0;
}
@@ -4934,19 +5027,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
- if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+ /*
+ * SDM says that segment selectors are loaded before segment
+ * descriptors
+ */
+ kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
+ kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+ kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+ kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+ kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+
+ /*
+ * Now load segment descriptors. If fault happenes at this stage
+ * it is handled in a context of new task
+ */
+ if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+ if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+ if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
return 1;
- if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+ if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
return 1;
return 0;
}
--
1.6.5
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 4/4] KVM: Fix emulate_sys[call, enter, exit]()'s fault handling
2010-02-18 10:14 [PATCH 0/4] More emulator correctness fixes Gleb Natapov
` (2 preceding siblings ...)
2010-02-18 10:15 ` [PATCH 3/4] KVM: Fix segment descriptor loading Gleb Natapov
@ 2010-02-18 10:15 ` Gleb Natapov
2010-02-18 10:38 ` [PATCH 0/4] More emulator correctness fixes Avi Kivity
2010-02-18 10:55 ` Takuya Yoshikawa
5 siblings, 0 replies; 7+ messages in thread
From: Gleb Natapov @ 2010-02-18 10:15 UTC (permalink / raw)
To: avi, mtosatti; +Cc: kvm
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
This patch fixes emulate_syscall(), emulate_sysenter() and
emulate_sysexit() to handle injected faults properly.
Even though original code injects faults in these functions,
we cannot handle these unless we use the different return
value from the UNHANDLEABLE case. So this patch use X86EMUL_*
codes instead of -1 and 0 and makes x86_emulate_insn() to
handle these propagated faults.
Be sure that, in x86_emulate_insn(), goto cannot_emulate and
goto done with rc equals X86EMUL_UNHANDLEABLE have same effect.
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/kvm/emulate.c | 37 ++++++++++++++++++++-----------------
1 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8d315ab..35f7acd 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1590,7 +1590,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
/* syscall is not available in real mode */
if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
- return -1;
+ return X86EMUL_UNHANDLEABLE;
setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1627,7 +1627,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
}
- return 0;
+ return X86EMUL_CONTINUE;
}
static int
@@ -1640,14 +1640,14 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
/* inject #GP if in real mode */
if (ctxt->mode == X86EMUL_MODE_REAL) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_UNHANDLEABLE;
}
/* XXX sysenter/sysexit have not been tested in 64bit mode.
* Therefore, we inject an #UD.
*/
if (ctxt->mode == X86EMUL_MODE_PROT64)
- return -1;
+ return X86EMUL_UNHANDLEABLE;
setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1656,13 +1656,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
case X86EMUL_MODE_PROT32:
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
break;
case X86EMUL_MODE_PROT64:
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
break;
}
@@ -1687,7 +1687,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
c->regs[VCPU_REGS_RSP] = msr_data;
- return 0;
+ return X86EMUL_CONTINUE;
}
static int
@@ -1702,7 +1702,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
if (ctxt->mode == X86EMUL_MODE_REAL ||
ctxt->mode == X86EMUL_MODE_VM86) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_UNHANDLEABLE;
}
setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1720,7 +1720,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
cs.selector = (u16)(msr_data + 16);
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
ss.selector = (u16)(msr_data + 24);
break;
@@ -1728,7 +1728,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
cs.selector = (u16)(msr_data + 32);
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
ss.selector = cs.selector + 8;
cs.db = 0;
@@ -1744,7 +1744,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
- return 0;
+ return X86EMUL_CONTINUE;
}
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
@@ -2472,8 +2472,9 @@ twobyte_insn:
}
break;
case 0x05: /* syscall */
- if (emulate_syscall(ctxt) == -1)
- goto cannot_emulate;
+ rc = emulate_syscall(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
else
goto writeback;
break;
@@ -2541,14 +2542,16 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
- if (emulate_sysenter(ctxt) == -1)
- goto cannot_emulate;
+ rc = emulate_sysenter(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
else
goto writeback;
break;
case 0x35: /* sysexit */
- if (emulate_sysexit(ctxt) == -1)
- goto cannot_emulate;
+ rc = emulate_sysexit(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
else
goto writeback;
break;
--
1.6.5
^ permalink raw reply related [flat|nested] 7+ messages in thread