* [PATCH 1/4] Fix handling of a fault during NMI unblocked due to IRET
@ 2009-03-29 14:12 Gleb Natapov
2009-03-29 14:12 ` [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() Gleb Natapov
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Gleb Natapov @ 2009-03-29 14:12 UTC (permalink / raw)
To: avi; +Cc: kvm
Bit 12 is undefined in any of the following cases:
If the VM exit sets the valid bit in the IDT-vectoring information field.
If the VM exit is due to a double fault.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/kvm/vmx.c | 17 +++++++++++------
1 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 37ae13d..14e3f48 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3259,36 +3259,41 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
- u32 idt_vectoring_info;
+ u32 idt_vectoring_info = vmx->idt_vectoring_info;
bool unblock_nmi;
u8 vector;
int type;
bool idtv_info_valid;
u32 error;
+ idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
if (cpu_has_virtual_nmis()) {
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
/*
- * SDM 3: 25.7.1.2
+ * SDM 3: 27.7.1.2 (September 2008)
* Re-set bit "block by NMI" before VM entry if vmexit caused by
* a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
*/
- if (unblock_nmi && vector != DF_VECTOR)
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
- idt_vectoring_info = vmx->idt_vectoring_info;
- idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
if (vmx->vcpu.arch.nmi_injected) {
/*
- * SDM 3: 25.7.1.2
+ * SDM 3: 27.7.1.2 (September 2008)
* Clear bit "block by NMI" before VM entry if a NMI delivery
* faulted.
*/
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() 2009-03-29 14:12 [PATCH 1/4] Fix handling of a fault during NMI unblocked due to IRET Gleb Natapov @ 2009-03-29 14:12 ` Gleb Natapov 2009-03-30 7:35 ` Avi Kivity 2009-03-29 14:12 ` [PATCH 3/4] Do not zero idt_vectoring_info in vmx_complete_interrupts() Gleb Natapov 2009-03-29 14:12 ` [PATCH 4/4] Fix task switching Gleb Natapov 2 siblings, 1 reply; 18+ messages in thread From: Gleb Natapov @ 2009-03-29 14:12 UTC (permalink / raw) To: avi; +Cc: kvm Signed-off-by: Gleb Natapov <gleb@redhat.com> --- arch/x86/kvm/vmx.c | 43 +++++++++++++++++++++++++------------------ 1 files changed, 25 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 14e3f48..1017544 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3264,7 +3264,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) u8 vector; int type; bool idtv_info_valid; - u32 error; idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); @@ -3289,34 +3288,42 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); + vmx->vcpu.arch.nmi_injected = false; + kvm_clear_exception_queue(&vmx->vcpu); + kvm_clear_interrupt_queue(&vmx->vcpu); + + if (!idtv_info_valid) + return; + vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - if (vmx->vcpu.arch.nmi_injected) { + + switch(type) { + case INTR_TYPE_NMI_INTR: + vmx->vcpu.arch.nmi_injected = true; /* * SDM 3: 27.7.1.2 (September 2008) - * Clear bit "block by NMI" before VM entry if a NMI delivery - * faulted. + * Clear bit "block by NMI" before VM entry if a NMI + * delivery faulted. */ - if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->vcpu.arch.nmi_injected = false; - } - kvm_clear_exception_queue(&vmx->vcpu); - if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || - type == INTR_TYPE_SOFT_EXCEPTION)) { + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + break; + case INTR_TYPE_HARD_EXCEPTION: + case INTR_TYPE_SOFT_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { - error = vmcs_read32(IDT_VECTORING_ERROR_CODE); - kvm_queue_exception_e(&vmx->vcpu, vector, error); + u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); + kvm_queue_exception_e(&vmx->vcpu, vector, err); } else kvm_queue_exception(&vmx->vcpu, vector); vmx->idt_vectoring_info = 0; - } - kvm_clear_interrupt_queue(&vmx->vcpu); - if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { + break; + case INTR_TYPE_EXT_INTR: kvm_queue_interrupt(&vmx->vcpu, vector); vmx->idt_vectoring_info = 0; + break; + default: + break; } } ^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() 2009-03-29 14:12 ` [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() Gleb Natapov @ 2009-03-30 7:35 ` Avi Kivity 2009-03-30 16:03 ` Jan Kiszka 0 siblings, 1 reply; 18+ messages in thread From: Avi Kivity @ 2009-03-30 7:35 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm Gleb Natapov wrote: > Signed-off-by: Gleb Natapov <gleb@redhat.com> > This is actually not just a rewrite, but also a bugfix: > INTR_INFO); > @@ -3289,34 +3288,42 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) > vmx->vnmi_blocked_time += > ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); > > + vmx->vcpu.arch.nmi_injected = false; > + kvm_clear_exception_queue(&vmx->vcpu); > + kvm_clear_interrupt_queue(&vmx->vcpu); > + > + if (!idtv_info_valid) > + return; > + > vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; > type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; > - if (vmx->vcpu.arch.nmi_injected) { > + > + switch(type) { > + case INTR_TYPE_NMI_INTR: > + vmx->vcpu.arch.nmi_injected = true; > /* > The existing code would leave nmi_injected == false if we exit on NMI_INTR, so we drop an NMI here. -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() 2009-03-30 7:35 ` Avi Kivity @ 2009-03-30 16:03 ` Jan Kiszka 0 siblings, 0 replies; 18+ messages in thread From: Jan Kiszka @ 2009-03-30 16:03 UTC (permalink / raw) To: Avi Kivity; +Cc: Gleb Natapov, kvm [-- Attachment #1: Type: text/plain, Size: 1231 bytes --] Avi Kivity wrote: > Gleb Natapov wrote: >> Signed-off-by: Gleb Natapov <gleb@redhat.com> >> > > This is actually not just a rewrite, but also a bugfix: > >> INTR_INFO); >> @@ -3289,34 +3288,42 @@ static void vmx_complete_interrupts(struct >> vcpu_vmx *vmx) >> vmx->vnmi_blocked_time += >> ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); >> >> + vmx->vcpu.arch.nmi_injected = false; >> + kvm_clear_exception_queue(&vmx->vcpu); >> + kvm_clear_interrupt_queue(&vmx->vcpu); >> + >> + if (!idtv_info_valid) >> + return; >> + >> vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; >> type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; >> - if (vmx->vcpu.arch.nmi_injected) { >> + >> + switch(type) { >> + case INTR_TYPE_NMI_INTR: >> + vmx->vcpu.arch.nmi_injected = true; >> /* >> > > The existing code would leave nmi_injected == false if we exit on > NMI_INTR, so we drop an NMI here. > I think NMI_INTR and nmi_injected always go together. However, the rework looks good and more logical to me, too. Will see that I can give this (more precisely -v2) a try with our scenarios ASAP. Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 257 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 3/4] Do not zero idt_vectoring_info in vmx_complete_interrupts(). 2009-03-29 14:12 [PATCH 1/4] Fix handling of a fault during NMI unblocked due to IRET Gleb Natapov 2009-03-29 14:12 ` [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() Gleb Natapov @ 2009-03-29 14:12 ` Gleb Natapov 2009-03-29 14:12 ` [PATCH 4/4] Fix task switching Gleb Natapov 2 siblings, 0 replies; 18+ messages in thread From: Gleb Natapov @ 2009-03-29 14:12 UTC (permalink / raw) To: avi; +Cc: kvm We will need it later in task_switch(). Code in handle_exception() is dead. is_external_interrupt(vect_info) will always be false since idt_vectoring_info is zeroed in vmx_complete_interrupts(). Signed-off-by: Gleb Natapov <gleb@redhat.com> --- arch/x86/kvm/vmx.c | 7 ------- 1 files changed, 0 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1017544..0da7a9e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2613,11 +2613,6 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " "intr info 0x%x\n", __func__, vect_info, intr_info); - if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { - int irq = vect_info & VECTORING_INFO_VECTOR_MASK; - kvm_push_irq(vcpu, irq); - } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ @@ -3316,11 +3311,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) kvm_queue_exception_e(&vmx->vcpu, vector, err); } else kvm_queue_exception(&vmx->vcpu, vector); - vmx->idt_vectoring_info = 0; break; case INTR_TYPE_EXT_INTR: kvm_queue_interrupt(&vmx->vcpu, vector); - vmx->idt_vectoring_info = 0; break; default: break; ^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 4/4] Fix task switching. 2009-03-29 14:12 [PATCH 1/4] Fix handling of a fault during NMI unblocked due to IRET Gleb Natapov 2009-03-29 14:12 ` [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() Gleb Natapov 2009-03-29 14:12 ` [PATCH 3/4] Do not zero idt_vectoring_info in vmx_complete_interrupts() Gleb Natapov @ 2009-03-29 14:12 ` Gleb Natapov 2009-03-30 7:39 ` Avi Kivity 2009-03-30 16:04 ` Jan Kiszka 2 siblings, 2 replies; 18+ messages in thread From: Gleb Natapov @ 2009-03-29 14:12 UTC (permalink / raw) To: avi; +Cc: kvm The patch fixes two problems with task switching. 1. Back link is written to a wrong TSS. 2. Instruction emulation is not needed if the reason for task switch is a task gate in IDT and access to it is caused by an external even. 2 is currently solved only for VMX since there is not reliable way to skip an instruction in SVM. We should emulate it instead. Signed-off-by: Gleb Natapov <gleb@redhat.com> --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm.c | 25 ++++++++++++++++++------- arch/x86/kvm/vmx.c | 40 +++++++++++++++++++++++++++++----------- arch/x86/kvm/x86.c | 40 +++++++++++++++++++++++++++++++--------- 4 files changed, 79 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ada75..85574b7 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EVTINJ_VALID_ERR (1 << 11) #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1fcbc17..3ffb695 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1823,17 +1823,28 @@ static int task_switch_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { u16 tss_selector; + int reason; + int int_type = svm->vmcb->control.exit_int_info & + SVM_EXITINTINFO_TYPE_MASK; tss_selector = (u16)svm->vmcb->control.exit_info_1; + if (svm->vmcb->control.exit_info_2 & (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) - return kvm_task_switch(&svm->vcpu, tss_selector, - TASK_SWITCH_IRET); - if (svm->vmcb->control.exit_info_2 & - (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) - return kvm_task_switch(&svm->vcpu, tss_selector, - TASK_SWITCH_JMP); - return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); + reason = TASK_SWITCH_IRET; + else if (svm->vmcb->control.exit_info_2 & + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) + reason = TASK_SWITCH_JMP; + else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID) + reason = TASK_SWITCH_GATE; + else + reason = TASK_SWITCH_CALL; + + + if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT) + skip_emulated_instruction(&svm->vcpu); + + return kvm_task_switch(&svm->vcpu, tss_selector, reason); } static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0da7a9e..01db958 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3025,22 +3025,40 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; - int reason; + int reason, type, idt_v; + + idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); + type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && - (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) - == INTR_TYPE_NMI_INTR) { - vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + if (reason == TASK_SWITCH_GATE && idt_v) { + switch (type) { + case INTR_TYPE_NMI_INTR: + vcpu->arch.nmi_injected = false; + if (cpu_has_virtual_nmis()) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + break; + case INTR_TYPE_EXT_INTR: + kvm_clear_interrupt_queue(vcpu); + break; + case INTR_TYPE_HARD_EXCEPTION: + case INTR_TYPE_SOFT_EXCEPTION: + kvm_clear_exception_queue(vcpu); + break; + default: + break; + } } tss_selector = exit_qualification; + if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && + type != INTR_TYPE_EXT_INTR && + type != INTR_TYPE_NMI_INTR)) + skip_emulated_instruction(vcpu); + if (!kvm_task_switch(vcpu, tss_selector, reason)) return 0; @@ -3292,8 +3310,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - - switch(type) { + + switch (type) { case INTR_TYPE_NMI_INTR: vmx->vcpu.arch.nmi_injected = true; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae4918c..573bb3f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3697,7 +3697,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); - tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); } static int load_state_from_tss32(struct kvm_vcpu *vcpu, @@ -3794,8 +3793,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, } static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, - u32 old_tss_base, - struct desc_struct *nseg_desc) + u16 old_tss_sel, u32 old_tss_base, + struct desc_struct *nseg_desc) { struct tss_segment_16 tss_segment_16; int ret = 0; @@ -3814,6 +3813,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, &tss_segment_16, sizeof tss_segment_16)) goto out; + if (old_tss_sel != 0xffff) { + tss_segment_16.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_16.prev_task_link, + sizeof tss_segment_16.prev_task_link)) + goto out; + } + if (load_state_from_tss16(vcpu, &tss_segment_16)) goto out; @@ -3823,7 +3832,7 @@ out: } static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, - u32 old_tss_base, + u16 old_tss_sel, u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_32 tss_segment_32; @@ -3843,6 +3852,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, &tss_segment_32, sizeof tss_segment_32)) goto out; + if (old_tss_sel != 0xffff) { + tss_segment_32.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_32.prev_task_link, + sizeof tss_segment_32.prev_task_link)) + goto out; + } + if (load_state_from_tss32(vcpu, &tss_segment_32)) goto out; @@ -3896,14 +3915,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); } - kvm_x86_ops->skip_emulated_instruction(vcpu); + /* set back link to prev task only if NT bit is set in eflags + note that old_tss_sel is not used afetr this point */ + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) + old_tss_sel = 0xffff; if (nseg_desc.type & 8) - ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, - &nseg_desc); + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); else - ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, - &nseg_desc); + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { u32 eflags = kvm_x86_ops->get_rflags(vcpu); ^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-29 14:12 ` [PATCH 4/4] Fix task switching Gleb Natapov @ 2009-03-30 7:39 ` Avi Kivity 2009-03-30 13:06 ` Gleb Natapov 2009-03-30 16:04 ` Jan Kiszka 1 sibling, 1 reply; 18+ messages in thread From: Avi Kivity @ 2009-03-30 7:39 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm Gleb Natapov wrote: > The patch fixes two problems with task switching. > 1. Back link is written to a wrong TSS. > 2. Instruction emulation is not needed if the reason for task switch > is a task gate in IDT and access to it is caused by an external even. > > 2 is currently solved only for VMX since there is not reliable way to > skip an instruction in SVM. We should emulate it instead. > > Looks good, but please split into (at least) two patches. Also please provide a test case so we don't regress again. -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-30 7:39 ` Avi Kivity @ 2009-03-30 13:06 ` Gleb Natapov 0 siblings, 0 replies; 18+ messages in thread From: Gleb Natapov @ 2009-03-30 13:06 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm On Mon, Mar 30, 2009 at 10:39:21AM +0300, Avi Kivity wrote: > Gleb Natapov wrote: >> The patch fixes two problems with task switching. >> 1. Back link is written to a wrong TSS. >> 2. Instruction emulation is not needed if the reason for task switch >> is a task gate in IDT and access to it is caused by an external even. >> >> 2 is currently solved only for VMX since there is not reliable way to >> skip an instruction in SVM. We should emulate it instead. >> >> > > Looks good, but please split into (at least) two patches. Also please > provide a test case so we don't regress again. > This what I am using for testing. After running make you should get kernel.bin that can be booted from grub. Runs on real HW too. I am planing to add more test. Signed-off-by: Gleb Natapov <gleb@redhat.com> diff --git a/user/test/x86/kvmtest/Makefile b/user/test/x86/kvmtest/Makefile new file mode 100644 index 0000000..b93935f --- /dev/null +++ b/user/test/x86/kvmtest/Makefile @@ -0,0 +1,33 @@ +CC=gcc +AS=gcc +CFLAGS=-m32 -I. -O2 -Wall +ASFLAGS=-m32 -I. +OBJS=kernel.o lib.o boot.o memory.o gdt.o idt.o isrs.o tss.o uart.o +ALLOBJS=$(OBJS) tests/tests.o + +PHONY := all +all: kernel.bin + $(MAKE) -C tests + +kernel.bin: $(ALLOBJS) kernel.ld + ld -T kernel.ld $(ALLOBJS) -o $@ + +install: kernel.bin + cp $< /boot/ + +tests/tests.o: + $(MAKE) -C tests + +-include $(OBJS:.o=.d) + +# compile and generate dependency info +%.o: %.c + gcc -c $(CFLAGS) $*.c -o $*.o + gcc -MM $(CFLAGS) $*.c > $*.d + +PHONY += clean +clean: + $(MAKE) -C tests + -rm *.o *~ *.d kernel.bin + +.PHONY: $(PHONY) diff --git a/user/test/x86/kvmtest/boot.S b/user/test/x86/kvmtest/boot.S new file mode 100644 index 0000000..f74015c --- /dev/null +++ b/user/test/x86/kvmtest/boot.S @@ -0,0 +1,357 @@ +/* boot.S - bootstrap the kernel */ +/* Copyright (C) 1999, 2001 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#define ASM 1 +#include <multiboot.h> +#include <kernel.h> + +.text + +.globl start, _start +start: +_start: +jmp multiboot_entry + +/* Align 32 bits boundary. */ +.align 4 + +/* Multiboot header. */ +multiboot_header: +/* magic */ +.long MULTIBOOT_HEADER_MAGIC +/* flags */ +.long MULTIBOOT_HEADER_FLAGS +/* checksum */ +.long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) +#ifndef __ELF__ + /* header_addr */ + .long multiboot_header + /* load_addr */ + .long _start + /* load_end_addr */ + .long _edata + /* bss_end_addr */ + .long _end + /* entry_addr */ + .long multiboot_entry +#endif /* ! __ELF__ */ + + multiboot_entry: + /* Initialize the stack pointer. */ + movl $(STACK_START), %esp + + /* Reset EFLAGS. */ + pushl $0 + popf + + /* Push the pointer to the Multiboot information structure. */ + pushl %ebx + /* Push the magic value. */ + pushl %eax + + /* Now enter the C main function... */ + call cmain + + /* Halt. */ + pushl $halt_message + pushl $0 + call printk + + loop: hlt + jmp loop + +.globl isr0 +.globl isr1 +.globl isr2 +.globl isr3 +.globl isr4 +.globl isr5 +.globl isr6 +.globl isr7 +.globl isr8 +.globl isr9 +.globl isr10 +.globl isr11 +.globl isr12 +.globl isr13 +.globl isr14 +.globl isr15 +.globl isr16 +.globl isr17 +.globl isr18 +.globl isr19 +.globl isr20 +.globl isr21 +.globl isr22 +.globl isr23 +.globl isr24 +.globl isr25 +.globl isr26 +.globl isr27 +.globl isr28 +.globl isr29 +.globl isr30 +.globl isr31 + +/* 0: Divide By Zero Exception */ +isr0: + cli + pushl $0 + pushl $0 + jmp isr_common_stub + +/* 1: Debug Exception */ +isr1: + cli + pushl $0 + pushl $1 + jmp isr_common_stub + +/* 2: Non Maskable Interrupt Exception */ +isr2: + cli + pushl $0 + pushl $2 + jmp isr_common_stub + +/* 3: Int 3 Exception */ +isr3: + cli + pushl $0 + pushl $3 + jmp isr_common_stub + +/* 4: INTO Exception */ +isr4: + cli + pushl $0 + pushl $4 + jmp isr_common_stub + +/* 5: Out of Bounds Exception */ +isr5: + cli + pushl $0 + pushl $5 + jmp isr_common_stub + +/* 6: Invalid Opcode Exception */ +isr6: + cli + pushl $0 + pushl $6 + jmp isr_common_stub + +/* 7: Coprocessor Not Available Exception */ +isr7: + cli + pushl $0 + pushl $7 + jmp isr_common_stub + +/* 8: Double Fault Exception (With Error Code!) */ +isr8: + cli + pushl $8 + jmp isr_common_stub + +/* 9: Coprocessor Segment Overrun Exception */ +isr9: + cli + pushl $0 + pushl $9 + jmp isr_common_stub + +/* 10: Bad TSS Exception (With Error Code!) */ +isr10: + cli + pushl $10 + jmp isr_common_stub + +/* 11: Segment Not Present Exception (With Error Code!) */ +isr11: + cli + pushl $11 + jmp isr_common_stub + +/* 12: Stack Fault Exception (With Error Code!) */ +isr12: + cli + pushl $12 + jmp isr_common_stub + +/* 13: General Protection Fault Exception (With Error Code!) */ +isr13: + cli + pushl $13 + jmp isr_common_stub + +/* 14: Page Fault Exception (With Error Code!) */ +isr14: + cli + pushl $14 + jmp isr_common_stub + +/* 15: Reserved Exception */ +isr15: + cli + pushl $0 + pushl $15 + jmp isr_common_stub + +/* 16: Floating Point Exception */ +isr16: + cli + pushl $0 + pushl $16 + jmp isr_common_stub + +/* 17: Alignment Check Exception */ +isr17: + cli + pushl $0 + pushl $17 + jmp isr_common_stub + +/* 18: Machine Check Exception */ +isr18: + cli + pushl $0 + pushl $18 + jmp isr_common_stub + +/* 19: Reserved */ +isr19: + cli + pushl $0 + pushl $19 + jmp isr_common_stub + +/* 20: Reserved */ +isr20: + cli + pushl $0 + pushl $20 + jmp isr_common_stub + +/* 21: Reserved */ +isr21: + cli + pushl $0 + pushl $21 + jmp isr_common_stub + +/* 22: Reserved */ +isr22: + cli + pushl $0 + pushl $22 + jmp isr_common_stub + +/* 23: Reserved */ +isr23: + cli + pushl $0 + pushl $23 + jmp isr_common_stub + +/* 24: Reserved */ +isr24: + cli + pushl $0 + pushl $24 + jmp isr_common_stub + +/* 25: Reserved */ +isr25: + cli + pushl $0 + pushl $25 + jmp isr_common_stub + +/* 26: Reserved */ +isr26: + cli + pushl $0 + pushl $26 + jmp isr_common_stub + +/* 27: Reserved */ +isr27: + cli + pushl $0 + pushl $27 + jmp isr_common_stub + +/* 28: Reserved */ +isr28: + cli + pushl $0 + pushl $28 + jmp isr_common_stub + +/* 29: Reserved */ +isr29: + cli + pushl $0 + pushl $29 + jmp isr_common_stub + +/* 30: Reserved */ +isr30: + cli + pushl $0 + pushl $30 + jmp isr_common_stub + +/* 31: Reserved */ +isr31: + cli + pushl $0 + pushl $31 + jmp isr_common_stub + + +/* This is our common ISR stub. It saves the processor state, sets + up for kernel mode segments, calls the C-level fault handler, + and finally restores the stack frame. */ +isr_common_stub: + pusha + pushl %ds + pushl %es + pushl %fs + pushl %gs + mov $0x10, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + mov %esp, %eax + pushl %eax + call fault_handler + popl %eax + popl %gs + popl %fs + popl %es + popl %ds + popa + add $8, %esp + iret + +.data + halt_message: + .asciz "Halted.\n" diff --git a/user/test/x86/kvmtest/gdt.c b/user/test/x86/kvmtest/gdt.c new file mode 100644 index 0000000..38e5735 --- /dev/null +++ b/user/test/x86/kvmtest/gdt.c @@ -0,0 +1,84 @@ +#include <kernel.h> +#include <lib.h> + +/* Defines a GDT entry */ +struct gdt_entry +{ + uint16_t limit_low; + uint16_t base_low; + uint8_t base_middle; + uint8_t access; + uint8_t granularity; + uint8_t base_high; +} __attribute__((packed)); + +struct gdt_ptr +{ + uint16_t limit; + uint32_t base; +} __attribute__((packed)); + +/* GDT, with 5 entries: + * 0x00 - NULL descriptor + * 0x08 - Code segment + * 0x10 - Data segment + * 0x18 - Primery task + * 0x20 - Interrupt task */ +static struct gdt_entry gdt[3 + TSS_COUNT]; +static struct gdt_ptr gp; + +static void gdt_flush(const struct gdt_ptr *gp_ptr) +{ + asm volatile ("lgdt %0\n\t" + "mov $0x10, %%ax\n\t" + "mov %%ax, %%ds\n\t" + "mov %%ax, %%es\n\t" + "mov %%ax, %%fs\n\t" + "mov %%ax, %%gs\n\t" + "mov %%ax, %%ss\n\t" + "jmp $0x08, $.Lflush2\n\t" + ".Lflush2: "::"m"(*gp_ptr)); +} + +void gdt_set_gate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) +{ + /* Setup the descriptor base address */ + gdt[num].base_low = (base & 0xFFFF); + gdt[num].base_middle = (base >> 16) & 0xFF; + gdt[num].base_high = (base >> 24) & 0xFF; + + /* Setup the descriptor limits */ + gdt[num].limit_low = (limit & 0xFFFF); + gdt[num].granularity = ((limit >> 16) & 0x0F); + + /* Finally, set up the granularity and access flags */ + gdt[num].granularity |= (gran & 0xF0); + gdt[num].access = access; +} + +void gdt_install(void) +{ + /* Setup the GDT pointer and limit */ + gp.limit = sizeof(gdt) - 1; + gp.base = (uint32_t)&gdt; + + memset(gdt, 0, sizeof(gdt)); + + /* Our NULL descriptor */ + gdt_set_gate(0, 0, 0, 0, 0); + + /* The second entry is our Code Segment. The base address + * is 0, the limit is 4GBytes, it uses 4KByte granularity, + * uses 32-bit opcodes, and is a Code Segment descriptor. + * Please check the table above in the tutorial in order + * to see exactly what each value means */ + gdt_set_gate(1, 0, 0xFFFFFFFF, 0x9A, 0xcf); + + /* The third entry is our Data Segment. It's EXACTLY the + * same as our code segment, but the descriptor type in + * this entry's access byte says it's a Data Segment */ + gdt_set_gate(2, 0, 0xFFFFFFFF, 0x92, 0xcf); + + /* Flush out the old GDT and install the new changes! */ + gdt_flush(&gp); +} diff --git a/user/test/x86/kvmtest/idt.c b/user/test/x86/kvmtest/idt.c new file mode 100644 index 0000000..f2d9de7 --- /dev/null +++ b/user/test/x86/kvmtest/idt.c @@ -0,0 +1,47 @@ +#include <kernel.h> +#include <lib.h> + +/* Defines an IDT entry */ +struct idt_entry { + uint16_t base_lo; + uint16_t sel; + uint8_t always0; + uint8_t flags; + uint16_t base_hi; +} __attribute__((packed)); + +struct idt_ptr +{ + uint16_t limit; + uint32_t base; +} __attribute__((packed)); + +struct idt_entry idt[256]; +struct idt_ptr idtp; + + +static void idt_load(const struct idt_ptr *idtptr) +{ + asm volatile("lidt %0"::"m" (*idtptr)); +} + +void idt_set_gate(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags) +{ + /* The interrupt routine's base address */ + idt[num].base_lo = (base & 0xFFFF); + idt[num].base_hi = (base >> 16) & 0xFFFF; + + idt[num].sel = sel; + idt[num].always0 = 0; + idt[num].flags = flags; +} + +void idt_install() +{ + idtp.limit = sizeof(idt) - 1; + idtp.base = (uint32_t)&idt; + + memset(&idt, 0, sizeof(idt)); + + idt_load(&idtp); +} diff --git a/user/test/x86/kvmtest/isrs.c b/user/test/x86/kvmtest/isrs.c new file mode 100644 index 0000000..3dcfe59 --- /dev/null +++ b/user/test/x86/kvmtest/isrs.c @@ -0,0 +1,112 @@ +#include <kernel.h> +#include <lib.h> + +/* This defines what the stack looks like after an ISR was running */ +struct regs +{ + unsigned int gs, fs, es, ds; /* pushed the segs last */ + unsigned int edi, esi, ebp, esp, ebx, edx, ecx, eax; /* pushed by 'pusha' */ + unsigned int int_no, err_code; /* our 'push byte #' and ecodes do this */ + unsigned int eip, cs, eflags, useresp, ss; /* pushed by the processor automatically */ +}; + +extern void isr0(); +extern void isr1(); +extern void isr2(); +extern void isr3(); +extern void isr4(); +extern void isr5(); +extern void isr6(); +extern void isr7(); +extern void isr8(); +extern void isr9(); +extern void isr10(); +extern void isr11(); +extern void isr12(); +extern void isr13(); +extern void isr14(); +extern void isr15(); +extern void isr16(); +extern void isr17(); +extern void isr18(); +extern void isr19(); +extern void isr20(); +extern void isr21(); +extern void isr22(); +extern void isr23(); +extern void isr24(); +extern void isr25(); +extern void isr26(); +extern void isr27(); +extern void isr28(); +extern void isr29(); +extern void isr30(); +extern void isr31(); + +void (*isrs[32])() = {isr0, isr1, isr2, isr3, isr4, isr5, isr6, isr7, isr8, + isr9, isr10, isr11, isr12, isr13, isr14, isr15, isr16, + isr17, isr18, isr19, isr20, isr21, isr22, isr23, isr24, + isr25, isr26, isr27, isr28, isr29, isr30, isr31}; + +void isrs_install_one(uint8_t isr) +{ + idt_set_gate(isr, (uint32_t)isrs[isr], 0x08, 0x8E); +} + +void isrs_install(void) +{ + int i; + + for (i = 0; i < 32; i++) + isrs_install_one(i); +} + +char *exception_messages[] = +{ + "Division By Zero", + "Debug", + "Non Maskable Interrupt", + "Breakpoint", + "Into Detected Overflow", + "Out of Bounds", + "Invalid Opcode", + "No Coprocessor", + + "Double Fault", + "Coprocessor Segment Overrun", + "Bad TSS", + "Segment Not Present", + "Stack Fault", + "General Protection Fault", + "Page Fault", + "Unknown Interrupt", + + "Coprocessor Fault", + "Alignment Check", + "Machine Check", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +void fault_handler(struct regs *r) +{ + if (r->int_no < 32) + { + printk(0, exception_messages[r->int_no]); + printk(0, "Exception. System Halted!\n"); + for (;;) + asm volatile ("hlt"); + } +} diff --git a/user/test/x86/kvmtest/kernel.c b/user/test/x86/kvmtest/kernel.c new file mode 100644 index 0000000..1920af7 --- /dev/null +++ b/user/test/x86/kvmtest/kernel.c @@ -0,0 +1,194 @@ +/* kernel.c - the C part of the kernel */ +/* Copyright (C) 1999 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <kernel.h> +#include <multiboot.h> +#include <lib.h> +#include <memory.h> + +/* Macros. */ + +/* Check if the bit BIT in FLAGS is set. */ +#define CHECK_FLAG(flags,bit) ((flags) & (1 << (bit))) + +/* Forward declarations. */ +void cmain (unsigned long magic, unsigned long addr); + +#if 0 +/* Check if MAGIC is valid and print the Multiboot information structure + pointed by ADDR. */ +static void print_boot_info(unsigned long magic, unsigned long addr) +{ + multiboot_info_t *mbi; + + /* Am I booted by a Multiboot-compliant boot loader? */ + if (magic != MULTIBOOT_BOOTLOADER_MAGIC) + { + printk ("Invalid magic number: 0x%x\n", (unsigned) magic); + return; + } + + /* Set MBI to the address of the Multiboot information structure. */ + mbi = (multiboot_info_t *) addr; + + /* Print out the flags. */ + printk ("flags = 0x%x\n", (unsigned) mbi->flags); + + /* Are mem_* valid? */ + if (CHECK_FLAG (mbi->flags, 0)) + printk ("mem_lower = %uKB, mem_upper = %uKB\n", + (unsigned) mbi->mem_lower, (unsigned) mbi->mem_upper); + + /* Is boot_device valid? */ + if (CHECK_FLAG (mbi->flags, 1)) + printk ("boot_device = 0x%x\n", (unsigned) mbi->boot_device); + + /* Is the command line passed? */ + if (CHECK_FLAG (mbi->flags, 2)) + printk ("cmdline = %s\n", (char *) mbi->cmdline); + + /* Are mods_* valid? */ + if (CHECK_FLAG (mbi->flags, 3)) + { + module_t *mod; + int i; + + printk ("mods_count = %d, mods_addr = 0x%x\n", + (int) mbi->mods_count, (int) mbi->mods_addr); + for (i = 0, mod = (module_t *) mbi->mods_addr; + i < mbi->mods_count; + i++, mod++) + printk (" mod_start = 0x%x, mod_end = 0x%x, string = %s\n", + (unsigned) mod->mod_start, + (unsigned) mod->mod_end, + (char *) mod->string); + } + + /* Bits 4 and 5 are mutually exclusive! */ + if (CHECK_FLAG (mbi->flags, 4) && CHECK_FLAG (mbi->flags, 5)) + { + printk ("Both bits 4 and 5 are set.\n"); + return; + } + + /* Is the symbol table of a.out valid? */ + if (CHECK_FLAG (mbi->flags, 4)) + { + aout_symbol_table_t *aout_sym = &(mbi->u.aout_sym); + + printk ("aout_symbol_table: tabsize = 0x%0x, " + "strsize = 0x%x, addr = 0x%x\n", + (unsigned) aout_sym->tabsize, + (unsigned) aout_sym->strsize, + (unsigned) aout_sym->addr); + } + + /* Is the section header table of ELF valid? */ + if (CHECK_FLAG (mbi->flags, 5)) + { + elf_section_header_table_t *elf_sec = &(mbi->u.elf_sec); + + printk ("elf_sec: num = %u, size = 0x%x," + " addr = 0x%x, shndx = 0x%x\n", + (unsigned) elf_sec->num, (unsigned) elf_sec->size, + (unsigned) elf_sec->addr, (unsigned) elf_sec->shndx); + } + + /* Are mmap_* valid? */ + if (CHECK_FLAG (mbi->flags, 6)) + { + memory_map_t *mmap; + + printk ("mmap_addr = 0x%x, mmap_length = 0x%x\n", + (unsigned) mbi->mmap_addr, (unsigned) mbi->mmap_length); + for (mmap = (memory_map_t *) mbi->mmap_addr; + (unsigned long) mmap < mbi->mmap_addr + mbi->mmap_length; + mmap = (memory_map_t *) ((unsigned long) mmap + + mmap->size + sizeof (mmap->size))) + printk (" size = 0x%x, base_addr = 0x%x%x," + " length = 0x%x%x, type = 0x%x\n", + (unsigned) mmap->size, + (unsigned) mmap->base_addr_high, + (unsigned) mmap->base_addr_low, + (unsigned) mmap->length_high, + (unsigned) mmap->length_low, + (unsigned) mmap->type); + } +} +#endif + +uint16_t verbosity; + +static void cmd_parse(multiboot_info_t *mbi) +{ + char *v; + + if (!CHECK_FLAG (mbi->flags, 2)) + return; + + v = strstr((char*)mbi->cmdline, "v="); + + if (!v) + return; + + verbosity = atoi(v + 2); + printk(3, "cmdline = %s\n", (char *) mbi->cmdline); +} + +extern struct test_desc __tests_start[], __tests_end[]; + +static void run_tests(void) +{ + struct test_desc *test; + + for (test = __tests_start; test < __tests_end; test++) { + int r; + printk(0, "Start test: %s\n", test->name); + r = test->fn(); + if (r < 0) { + printk(0, "Critical failure. Exiting.\n"); + return; + } else if (r == TEST_FAIL) + printk(0, "Test fails\n"); + else + printk(0, "Test Succeeds\n"); + } +} + +void cmain(unsigned long magic, unsigned long addr) +{ + cmd_parse((multiboot_info_t*)addr); + + gdt_install(); + tss_install(); + uart_init(); + + kalloc_init(_kernel_end, KALLOC_SIZE); + + /* Clear the screen. */ + cls (); + + printk(3, "kernel start=0x%x end=0x%x\n", _kernel_start, _kernel_end); + printk(3, "kalloc_size=%d\n", KALLOC_SIZE); + + idt_install(); + isrs_install(); + + run_tests(); +/* print_boot_info(magic, addr); */ +} + diff --git a/user/test/x86/kvmtest/kernel.h b/user/test/x86/kvmtest/kernel.h new file mode 100644 index 0000000..a2df4a2 --- /dev/null +++ b/user/test/x86/kvmtest/kernel.h @@ -0,0 +1,68 @@ +#ifndef _KERNEL_H +#define _KERNEL_H +#ifndef ASM +#include <stdint.h> + + +typedef uint32_t size_t; +#define NULL ((void*)0) + +/* 1M for dynamic memory management */ +#define KALLOC_SIZE (1024*1024 - (_kernel_end - _kernel_start)) +#define TSS_COUNT 4 +#define TSS_GDT_OFFSET 3 + +extern char _kernel_start[], _kernel_end[]; + +static inline void outb(int addr, int val) +{ + asm volatile ("outb %b1, %w0" : : "d" (addr), "a" (val)); +} + +static inline uint8_t inb(int addr) +{ + uint8_t val; + asm volatile ("inb %w1, %b0" : "=a" (val) : "d" (addr)); + return val; +} + +void gdt_install(void); +void gdt_set_gate(int num, uint32_t base, uint32_t limit, uint8_t access, + uint8_t gran); +void idt_install(void); +void idt_set_gate(uint8_t num, uint32_t base, uint16_t sel, uint8_t flags); + +void isrs_install(void); +void isrs_install_one(uint8_t isr); + +void tss_install(void); +void tss_setup(uint8_t gate, uint8_t desc, void (*fn)(void)); +void tss_info(void); + +void uart_init(void); +void serial_outch(char c); +char serial_inch(void); + +#define TEST_FAIL 0 +#define TEST_SUCCEED 1 +#define TEST_FAIL_EXIT -1 +typedef int (*testfn_t)(void); + +struct test_desc { + char *name; + testfn_t fn; +}; + +#define define_test(_fn, _name) \ + static struct test_desc __test_##fn __attribute__((__used__)) \ + __attribute__((__section__(".tests"))) = {.name = _name, .fn = _fn} + +extern uint16_t verbosity; + +#endif /* ASM */ + +/* put stack somewhere in low memory */ +#define STACK_START 0x80000 +#define INT_STACK_START 0x70000 + +#endif diff --git a/user/test/x86/kvmtest/kernel.ld b/user/test/x86/kvmtest/kernel.ld new file mode 100644 index 0000000..4be48e3 --- /dev/null +++ b/user/test/x86/kvmtest/kernel.ld @@ -0,0 +1,28 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(start) +SECTIONS +{ + . = 0x100000; + _kernel_start = .; + .text : { + *(.text) + } + .rodata : { + *(.rodata) + } + .data : { + *(.data) + } + .bss : { + *(.bss) + } + __tests_start = .; + .tests : { + *(.tests) + } + __tests_end = .; + . = ALIGN(4096); + _kernel_end = .; + /DISCARD/ : { *(.comment) } +} \ No newline at end of file diff --git a/user/test/x86/kvmtest/lib.c b/user/test/x86/kvmtest/lib.c new file mode 100644 index 0000000..e9a86ca --- /dev/null +++ b/user/test/x86/kvmtest/lib.c @@ -0,0 +1,296 @@ +#include <kernel.h> +#include <lib.h> + +/* Some screen stuff. */ +/* The number of columns. */ +#define COLUMNS 80 +/* The number of lines. */ +#define LINES 24 +/* The attribute of an character. */ +#define ATTRIBUTE 7 +/* The video memory address. */ +#define VIDEO 0xB8000 + +/* Variables. */ +/* Save the X position. */ +static int xpos; +/* Save the Y position. */ +static int ypos; +/* Point to the video memory. */ +static volatile unsigned char *video; + +/* Clear the screen and initialize VIDEO, XPOS and YPOS. */ +void cls (void) +{ + int i; + + video = (unsigned char *) VIDEO; + + for (i = 0; i < COLUMNS * LINES * 2; i++) + *(video + i) = 0; + + xpos = 0; + ypos = 0; +} + +/* Convert the integer D to a string and save the string in BUF. If + BASE is equal to 'd', interpret that D is decimal, and if BASE is + equal to 'x', interpret that D is hexadecimal. */ +void itoa (char *buf, int base, int d) +{ + char *p = buf; + char *p1, *p2; + unsigned long ud = d; + int divisor = 10; + + /* If %d is specified and D is minus, put `-' in the head. */ + if (base == 'd' && d < 0) + { + *p++ = '-'; + buf++; + ud = -d; + } + else if (base == 'x') + divisor = 16; + + /* Divide UD by DIVISOR until UD == 0. */ + do + { + int remainder = ud % divisor; + + *p++ = (remainder < 10) ? remainder + '0' : remainder + 'a' - 10; + } + while (ud /= divisor); + + /* Terminate BUF. */ + *p = 0; + + /* Reverse BUF. */ + p1 = buf; + p2 = p - 1; + while (p1 < p2) + { + char tmp = *p1; + *p1 = *p2; + *p2 = tmp; + p1++; + p2--; + } +} + +int isdigit(int c) +{ + return c >= '0' && c <='9'; +} + +int isupper(int c) +{ + return c >= 'A' && c <= 'Z'; +} + +int islower(int c) +{ + return c >= 'a' && c <= 'z'; +} + +int atoi_base(const char *buf, int base) +{ + int minus,val,digit,base_1; + char c; + + base_1 = base - 1; + + if((minus = *buf == '-')) + buf++; + + val = 0; + while ((c = *buf++)) { + if (isdigit(c)) + digit = c - 48; + else if (isupper(c)) + digit = c - 'A' + 10; + else if (islower(c)) + digit = c - 'a' + 10; + else + break; + + if (digit < 0 || digit > base_1) + break; + + val = base*val + digit; + } + + return minus ? -val : val; +} + +/* Put the character C on the screen. */ +static void kputchar (int c) +{ +#ifdef QEMU + outb(0x504, c); +#endif + serial_outch(c); + + if (c == '\n' || c == '\r') + { +newline: + xpos = 0; + ypos++; + if (ypos >= LINES) + ypos = 0; + return; + } + + *(video + (xpos + ypos * COLUMNS) * 2) = c & 0xFF; + *(video + (xpos + ypos * COLUMNS) * 2 + 1) = ATTRIBUTE; + + xpos++; + if (xpos >= COLUMNS) + goto newline; +} + +/* Format a string and print it on the screen, just like the libc + function printf. */ +void printk (uint16_t level, const char *format, ...) +{ + char **arg = (char **) &format; + int c; + char buf[20]; + + if (level > verbosity) + return; + + arg++; + + while ((c = *format++) != 0) + { + if (c != '%') + kputchar (c); + else + { + char *p; + + c = *format++; + switch (c) + { + case 'd': + case 'u': + case 'x': + itoa (buf, c, *((int *) arg++)); + p = buf; + goto string; + break; + + case 's': + p = *arg++; + if (! p) + p = "(null)"; + +string: + while (*p) + kputchar (*p++); + break; + + default: + kputchar (*((int *) arg++)); + break; + } + } + } +} + +void *memset(void *s, int c, size_t n) +{ + char *ss = s; + int i; + + for (i = 0; i < n; i++) + ss[i] = c; + + return s; +} + +char *strstr(const char *phaystack, const char *pneedle) +{ + const unsigned char *haystack, *needle; + char b, c; + + haystack = (const unsigned char *) phaystack; + needle = (const unsigned char *) pneedle; + + b = *needle; + if (b != '\0') + { + haystack--; /* possible ANSI violation */ + do + { + c = *++haystack; + if (c == '\0') + goto ret0; + } + while (c != b); + + c = *++needle; + if (c == '\0') + goto foundneedle; + ++needle; + goto jin; + + for (;;) + { + char a; + const unsigned char *rhaystack, *rneedle; + + do + { + a = *++haystack; + if (a == '\0') + goto ret0; + if (a == b) + break; + a = *++haystack; + if (a == '\0') + goto ret0; + shloop:; + } + while (a != b); + + jin: + a = *++haystack; + if (a == '\0') + goto ret0; + + if (a != c) + goto shloop; + + rhaystack = haystack-- + 1; + rneedle = needle; + a = *rneedle; + + if (*rhaystack == a) + do + { + if (a == '\0') + goto foundneedle; + ++rhaystack; + a = *++needle; + if (*rhaystack != a) + break; + if (a == '\0') + goto foundneedle; + ++rhaystack; + a = *++needle; + } + while (*rhaystack == a); + + needle = rneedle; /* took the register-poor approach */ + + if (a == '\0') + break; + } + } +foundneedle: + return (char*) haystack; +ret0: + return 0; +} diff --git a/user/test/x86/kvmtest/lib.h b/user/test/x86/kvmtest/lib.h new file mode 100644 index 0000000..b00849c --- /dev/null +++ b/user/test/x86/kvmtest/lib.h @@ -0,0 +1,13 @@ +#ifndef _LIB_H +#define _LIB_H +void cls(void); +void itoa(char *buf, int base, int d); +void printk(uint16_t level, const char *format, ...); +void *memset(void *s, int c, size_t n); +char *strstr(const char *phaystack, const char *pneedle); +int atoi_base(const char *buf, int base); +static inline int atoi(const char *buf) +{ + return atoi_base(buf, 10); +} +#endif diff --git a/user/test/x86/kvmtest/memory.c b/user/test/x86/kvmtest/memory.c new file mode 100644 index 0000000..1f0e7f2 --- /dev/null +++ b/user/test/x86/kvmtest/memory.c @@ -0,0 +1,100 @@ +#include <kernel.h> +#include <memory.h> + +#define ALIGNMASK 1U +#define ALIGN(s) (((s) + ALIGNMASK) & ~ALIGNMASK) +#define POFF ALIGN(sizeof(size_t)) +#define MINSIZ ALIGN(sizeof(struct cell *)) +#define C2P(c) ((char *)(c) + POFF) +#define P2C(p) ((struct cell *)((char *)(p) - POFF)) +#define ISADJ(c1,c2) ((struct cell *)(C2P(c1) + (c1)->size) == (struct cell *)(c2)) + +struct cell { + size_t size; + struct cell *next; +}; + +static struct mem_pool { + struct cell *tail; +} *kmem; + +void kalloc_init(void *mem, size_t size) +{ + kmem->tail = mem; + kmem->tail->size = size - POFF; + kmem->tail->next = kmem->tail; +} + +void *kalloc(size_t size) +{ + struct cell *c1, *c2, *c3; + + size = size < MINSIZ ? MINSIZ : ALIGN(size); + + c1 = kmem->tail; + while (c1->next->size < size) { + if (c1->next == kmem->tail) + return NULL; + c1 = c1->next; + } + c2 = c1->next; + if (c2->size > (POFF + size)) { /* split new cell */ + c3 = (struct cell *)(C2P(c2) + size); + c3->size = c2->size - (size + POFF); + c3->next = c2->next; + c2->size = size; + c1->next = c3; + } else { /* use the entire cell */ + c1->next = c2->next; + if (c2 == kmem->tail) { + kmem->tail = c1; + } + } + + return C2P(c2); +} + +void kfree(void *ptr) +{ + struct cell *c1, *c2, *c3; + int j1, j2; + +/* splice the cell back into the list */ + c1 = kmem->tail; + c2 = P2C(ptr); + + if (c2 > c1) { /* append to end of list */ + if (ISADJ(c1,c2)) { /* join with last cell */ + c1->size += POFF + c2->size; + return; + } + c2->next = c1->next; + c1->next = c2; + kmem->tail = c2; + return; + } + + while (c1->next < c2) { /* find insertion point */ + c1 = c1->next; + } + c3 = c1->next; + + j1 = ISADJ(c1,c2); /* c1 and c2 need to be joined */ + j2 = ISADJ(c2,c3); /* c2 and c3 need to be joined */ + + if (j1) { + if (j2) { /* splice all three cells together */ + c1->next = c3->next; + c1->size += POFF + c3->size; + } + c1->size += POFF + c2->size; + } else { + c1->next = c2; + if (j2) { + c2->next = c3->next; + c2->size += POFF + c3->size; + } else { + c2->next = c3; + } + } +} diff --git a/user/test/x86/kvmtest/memory.h b/user/test/x86/kvmtest/memory.h new file mode 100644 index 0000000..9a0c984 --- /dev/null +++ b/user/test/x86/kvmtest/memory.h @@ -0,0 +1,6 @@ +#ifndef _MEMORY_H +#define _MEMORY_H +void kalloc_init(void *mem, size_t size); +void *kalloc(size_t size); +void kfree(void *ptr); +#endif diff --git a/user/test/x86/kvmtest/multiboot.h b/user/test/x86/kvmtest/multiboot.h new file mode 100644 index 0000000..85a4605 --- /dev/null +++ b/user/test/x86/kvmtest/multiboot.h @@ -0,0 +1,109 @@ +/* multiboot.h - the header for Multiboot */ +/* Copyright (C) 1999, 2001 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Macros. */ + +/* The magic number for the Multiboot header. */ +#define MULTIBOOT_HEADER_MAGIC 0x1BADB002 + +/* The flags for the Multiboot header. */ +#ifdef __ELF__ +# define MULTIBOOT_HEADER_FLAGS 0x00000003 +#else +# define MULTIBOOT_HEADER_FLAGS 0x00010003 +#endif + +/* The magic number passed by a Multiboot-compliant boot loader. */ +#define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002 + +#ifndef ASM +/* Do not include here in boot.S. */ + +/* Types. */ + +/* The Multiboot header. */ +typedef struct multiboot_header +{ + unsigned long magic; + unsigned long flags; + unsigned long checksum; + unsigned long header_addr; + unsigned long load_addr; + unsigned long load_end_addr; + unsigned long bss_end_addr; + unsigned long entry_addr; +} multiboot_header_t; + +/* The symbol table for a.out. */ +typedef struct aout_symbol_table +{ + unsigned long tabsize; + unsigned long strsize; + unsigned long addr; + unsigned long reserved; +} aout_symbol_table_t; + +/* The section header table for ELF. */ +typedef struct elf_section_header_table +{ + unsigned long num; + unsigned long size; + unsigned long addr; + unsigned long shndx; +} elf_section_header_table_t; + +/* The Multiboot information. */ +typedef struct multiboot_info +{ + unsigned long flags; + unsigned long mem_lower; + unsigned long mem_upper; + unsigned long boot_device; + unsigned long cmdline; + unsigned long mods_count; + unsigned long mods_addr; + union + { + aout_symbol_table_t aout_sym; + elf_section_header_table_t elf_sec; + } u; + unsigned long mmap_length; + unsigned long mmap_addr; +} multiboot_info_t; + +/* The module structure. */ +typedef struct module +{ + unsigned long mod_start; + unsigned long mod_end; + unsigned long string; + unsigned long reserved; +} module_t; + +/* The memory map. Be careful that the offset 0 is base_addr_low + but no size. */ +typedef struct memory_map +{ + unsigned long size; + unsigned long base_addr_low; + unsigned long base_addr_high; + unsigned long length_low; + unsigned long length_high; + unsigned long type; +} memory_map_t; + +#endif /* ! ASM */ diff --git a/user/test/x86/kvmtest/tests/Makefile b/user/test/x86/kvmtest/tests/Makefile new file mode 100644 index 0000000..751d352 --- /dev/null +++ b/user/test/x86/kvmtest/tests/Makefile @@ -0,0 +1,22 @@ +CC=gcc +AS=gcc +CFLAGS=-m32 -I. -I.. -O2 -Wall +ASFLAGS=-m32 -I. -I.. +OBJS=tss_test.o + +PHONY := all +all: tests.o + +tests.o: $(OBJS) + ld -m elf_i386 -r $(OBJS) -o $@ + +-include $(OBJS:.o=.d) + +# compile and generate dependency info +%.o: %.c + gcc -c $(CFLAGS) $*.c -o $*.o + gcc -MM $(CFLAGS) $*.c > $*.d + +PHONY += clean +clean: + -rm *.o *~ *.d diff --git a/user/test/x86/kvmtest/tests/tss_test.c b/user/test/x86/kvmtest/tests/tss_test.c new file mode 100644 index 0000000..835dc0b --- /dev/null +++ b/user/test/x86/kvmtest/tests/tss_test.c @@ -0,0 +1,74 @@ +#include <kernel.h> +#include <lib.h> + + +static void nmi_tss(void) +{ +start: + printk(0, "NMI task is running\n"); + tss_info(); + asm volatile ("iret"); + printk(0, "NMI task restart after iret.\n"); + goto start; +} + +static int test_divider; + +static void de_tss(void) +{ +start: + printk(0, "DE task is running\n"); + tss_info(); + test_divider = 10; + asm volatile ("iret"); + goto start; +} + +static void of_tss(void) +{ +start: + printk(0, "OF task is running\n"); + tss_info(); + asm volatile ("iret"); + goto start; +} + +static int tss_test(void) +{ + int ret = TEST_SUCCEED, res; + + tss_setup(2, 1, nmi_tss); + tss_setup(0, 2, de_tss); + tss_setup(4, 3, of_tss); + + printk(0, "Triggering nmi\n"); + asm volatile ("int $2"); + printk(0, "Return from nmi 1\n"); + asm volatile ("int $2"); + printk(0, "Return from nmi 2\n"); + printk(0, "Try to devide by 0\n"); + res = 1500 / test_divider; + printk(0, "Result is %d\n", res); + if (res != 150) { + ret = TEST_FAIL; + goto restore_isrs; + } + printk(0, "Call int 0\n"); + asm volatile ("int $0"); + printk(0, "Return from int 0\n"); + printk(0, "Call into\n"); + asm volatile ("addb $127, %b0\ninto"::"a"(127)); + printk(0, "Return from into\n"); + printk(0, "Calling nmi task by lcall\n"); + asm volatile("lcall $0x20, $0"); + printk(0, "Return from call\n"); + +restore_isrs: + isrs_install_one(0); + isrs_install_one(2); + isrs_install_one(4); + + return ret; +} + +define_test(tss_test, "TSS test"); diff --git a/user/test/x86/kvmtest/tss.c b/user/test/x86/kvmtest/tss.c new file mode 100644 index 0000000..1d73531 --- /dev/null +++ b/user/test/x86/kvmtest/tss.c @@ -0,0 +1,108 @@ +#include <kernel.h> +#include <lib.h> + +struct tss_desc { + uint16_t link; + uint16_t link_h; + + uint32_t esp0; + uint16_t ss0; + uint16_t ss0_h; + + uint32_t esp1; + uint16_t ss1; + uint16_t ss1_h; + + uint32_t esp2; + uint16_t ss2; + uint16_t ss2_h; + + uint32_t cr3; + uint32_t eip; + uint32_t eflags; + + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + + uint32_t esp; + uint32_t ebp; + + uint32_t esi; + uint32_t edi; + + uint16_t es; + uint16_t es_h; + + uint16_t cs; + uint16_t cs_h; + + uint16_t ss; + uint16_t ss_h; + + uint16_t ds; + uint16_t ds_h; + + uint16_t fs; + uint16_t fs_h; + + uint16_t gs; + uint16_t gs_h; + + uint16_t ldt; + uint16_t ldt_h; + + uint16_t trap; + uint16_t iomap; +} __attribute__ ((packed)); + +static struct tss_desc tss[TSS_COUNT]; + +void tss_install(void) +{ + uint16_t desc_size = sizeof(struct tss_desc); + int i; + + for (i = 0; i < TSS_COUNT; i++) { + tss[i].ss0 = tss[i].ss1 = tss[i].ss2 = 0x10; + tss[i].esp0 = tss[i].esp1 = tss[i].esp2 = INT_STACK_START; + tss[i].cs = 0x08; + tss[i].ds = tss[i].es = tss[i].fs = tss[i].gs = tss[i].ss =0x10; + tss[i].esp = INT_STACK_START; + tss[i].iomap = (uint16_t)desc_size; + gdt_set_gate(TSS_GDT_OFFSET + i, (uint32_t)&tss[i], + desc_size - 1, 0x89, 0x0f); + } + + tss[0].esp0 = tss[0].esp1 = tss[0].esp2 = tss[0].esp = STACK_START; + + asm volatile ( "ltr %%ax" : : "a" ( 0x18 ) ); +} + +void tss_setup(uint8_t gate, uint8_t i, void (*fn)(void)) +{ + if (i >= TSS_COUNT) { + printk(0, "Try to setup TSS out if bound %d\n", tss); + return; + } + tss[i].eip = (uint32_t)fn; + printk(2, "TSS set gate %d to TSS %x\n", gate, + (i + TSS_GDT_OFFSET) << 3); + idt_set_gate(gate, 0, (i + TSS_GDT_OFFSET) << 3, 0x85); +} + +void tss_info(void) +{ + uint16_t tr, i; + + asm volatile ("str %0":"=r"(tr)); + + i = (tr >> 3) - TSS_GDT_OFFSET; + + if (i >= TSS_COUNT) + printk(0, "Current TR %x is wrong!\n", tr); + + printk(0, "TR=%x Main TSS back link %x. Current TSS back link %x\n", + tr, tss[0]. link, tss[i].link); +} diff --git a/user/test/x86/kvmtest/uart.c b/user/test/x86/kvmtest/uart.c new file mode 100644 index 0000000..d9661d1 --- /dev/null +++ b/user/test/x86/kvmtest/uart.c @@ -0,0 +1,43 @@ +#include <kernel.h> + +#define PORT1 0x3F8 +#define PORT2 0x2F8 +#define PORT3 0x3E8 +#define PORT4 0x2E8 + +#define BAUD38400 0x03 +#define BAUD115200 0x01 +#define BAUD57600 0x02 +#define BAUD19200 0x06 +#define BAUD9600 0x0C +#define BAUD4800 0x18 +#define BAUD2400 0x30 + +void uart_init(void) +{ + outb(PORT1 + 1 , 0); /* Turn off interrupts */ + + outb(PORT1 + 3 , 0x80); /* SET DLAB ON */ + /* Set Baud rate - Divisor Latch Low Byte */ + outb(PORT1 + 0 , BAUD38400); + /* Set Baud rate - Divisor Latch High Byte */ + outb(PORT1 + 1 , 0x00); + outb(PORT1 + 3 , 0x03); /* 8 Bits, No Parity, 1 Stop Bit */ + outb(PORT1 + 2 , 0xC7); /* FIFO Control Register */ + outb(PORT1 + 4 , 0x0B); /* Turn on DTR, RTS, and OUT2 */ +} + +void serial_outch(char c) +{ + outb(PORT1, c); +} + +char serial_inch(void) +{ + char c; + do { + c = inb(PORT1 + 5); + if (c & 1) + return inb(PORT1); + } while(1); +} -- Gleb. ^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-29 14:12 ` [PATCH 4/4] Fix task switching Gleb Natapov 2009-03-30 7:39 ` Avi Kivity @ 2009-03-30 16:04 ` Jan Kiszka 2009-03-30 16:21 ` Gleb Natapov 2009-03-31 9:03 ` Kohl, Bernhard (NSN - DE/Munich) 1 sibling, 2 replies; 18+ messages in thread From: Jan Kiszka @ 2009-03-30 16:04 UTC (permalink / raw) To: Gleb Natapov Cc: avi, kvm, Bernhard Kohl, Ostler, Thomas (NSN - DE/Munich), bliitz [-- Attachment #1: Type: text/plain, Size: 8674 bytes --] Gleb Natapov wrote: > The patch fixes two problems with task switching. > 1. Back link is written to a wrong TSS. > 2. Instruction emulation is not needed if the reason for task switch > is a task gate in IDT and access to it is caused by an external even. > > 2 is currently solved only for VMX since there is not reliable way to > skip an instruction in SVM. We should emulate it instead. Does this series fix all issues Bernhard, Thomas and Julian stumbled over? Jan > > Signed-off-by: Gleb Natapov <gleb@redhat.com> > --- > > arch/x86/include/asm/svm.h | 1 + > arch/x86/kvm/svm.c | 25 ++++++++++++++++++------- > arch/x86/kvm/vmx.c | 40 +++++++++++++++++++++++++++++----------- > arch/x86/kvm/x86.c | 40 +++++++++++++++++++++++++++++++--------- > 4 files changed, 79 insertions(+), 27 deletions(-) > > diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h > index 82ada75..85574b7 100644 > --- a/arch/x86/include/asm/svm.h > +++ b/arch/x86/include/asm/svm.h > @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { > #define SVM_EVTINJ_VALID_ERR (1 << 11) > > #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK > +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK > > #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR > #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 1fcbc17..3ffb695 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1823,17 +1823,28 @@ static int task_switch_interception(struct vcpu_svm *svm, > struct kvm_run *kvm_run) > { > u16 tss_selector; > + int reason; > + int int_type = svm->vmcb->control.exit_int_info & > + SVM_EXITINTINFO_TYPE_MASK; > > tss_selector = (u16)svm->vmcb->control.exit_info_1; > + > if (svm->vmcb->control.exit_info_2 & > (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) > - return kvm_task_switch(&svm->vcpu, tss_selector, > - TASK_SWITCH_IRET); > - if (svm->vmcb->control.exit_info_2 & > - (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) > - return kvm_task_switch(&svm->vcpu, tss_selector, > - TASK_SWITCH_JMP); > - return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); > + reason = TASK_SWITCH_IRET; > + else if (svm->vmcb->control.exit_info_2 & > + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) > + reason = TASK_SWITCH_JMP; > + else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID) > + reason = TASK_SWITCH_GATE; > + else > + reason = TASK_SWITCH_CALL; > + > + > + if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT) > + skip_emulated_instruction(&svm->vcpu); > + > + return kvm_task_switch(&svm->vcpu, tss_selector, reason); > } > > static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 0da7a9e..01db958 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -3025,22 +3025,40 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > struct vcpu_vmx *vmx = to_vmx(vcpu); > unsigned long exit_qualification; > u16 tss_selector; > - int reason; > + int reason, type, idt_v; > + > + idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); > + type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); > > exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > > reason = (u32)exit_qualification >> 30; > - if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && > - (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && > - (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) > - == INTR_TYPE_NMI_INTR) { > - vcpu->arch.nmi_injected = false; > - if (cpu_has_virtual_nmis()) > - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, > - GUEST_INTR_STATE_NMI); > + if (reason == TASK_SWITCH_GATE && idt_v) { > + switch (type) { > + case INTR_TYPE_NMI_INTR: > + vcpu->arch.nmi_injected = false; > + if (cpu_has_virtual_nmis()) > + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, > + GUEST_INTR_STATE_NMI); > + break; > + case INTR_TYPE_EXT_INTR: > + kvm_clear_interrupt_queue(vcpu); > + break; > + case INTR_TYPE_HARD_EXCEPTION: > + case INTR_TYPE_SOFT_EXCEPTION: > + kvm_clear_exception_queue(vcpu); > + break; > + default: > + break; > + } > } > tss_selector = exit_qualification; > > + if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && > + type != INTR_TYPE_EXT_INTR && > + type != INTR_TYPE_NMI_INTR)) > + skip_emulated_instruction(vcpu); > + > if (!kvm_task_switch(vcpu, tss_selector, reason)) > return 0; > > @@ -3292,8 +3310,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) > > vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; > type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; > - > - switch(type) { > + > + switch (type) { > case INTR_TYPE_NMI_INTR: > vmx->vcpu.arch.nmi_injected = true; > /* > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index ae4918c..573bb3f 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3697,7 +3697,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, > tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); > tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); > tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); > - tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); > } > > static int load_state_from_tss32(struct kvm_vcpu *vcpu, > @@ -3794,8 +3793,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, > } > > static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, > - u32 old_tss_base, > - struct desc_struct *nseg_desc) > + u16 old_tss_sel, u32 old_tss_base, > + struct desc_struct *nseg_desc) > { > struct tss_segment_16 tss_segment_16; > int ret = 0; > @@ -3814,6 +3813,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, > &tss_segment_16, sizeof tss_segment_16)) > goto out; > > + if (old_tss_sel != 0xffff) { > + tss_segment_16.prev_task_link = old_tss_sel; > + > + if (kvm_write_guest(vcpu->kvm, > + get_tss_base_addr(vcpu, nseg_desc), > + &tss_segment_16.prev_task_link, > + sizeof tss_segment_16.prev_task_link)) > + goto out; > + } > + > if (load_state_from_tss16(vcpu, &tss_segment_16)) > goto out; > > @@ -3823,7 +3832,7 @@ out: > } > > static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, > - u32 old_tss_base, > + u16 old_tss_sel, u32 old_tss_base, > struct desc_struct *nseg_desc) > { > struct tss_segment_32 tss_segment_32; > @@ -3843,6 +3852,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, > &tss_segment_32, sizeof tss_segment_32)) > goto out; > > + if (old_tss_sel != 0xffff) { > + tss_segment_32.prev_task_link = old_tss_sel; > + > + if (kvm_write_guest(vcpu->kvm, > + get_tss_base_addr(vcpu, nseg_desc), > + &tss_segment_32.prev_task_link, > + sizeof tss_segment_32.prev_task_link)) > + goto out; > + } > + > if (load_state_from_tss32(vcpu, &tss_segment_32)) > goto out; > > @@ -3896,14 +3915,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) > kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); > } > > - kvm_x86_ops->skip_emulated_instruction(vcpu); > + /* set back link to prev task only if NT bit is set in eflags > + note that old_tss_sel is not used afetr this point */ > + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) > + old_tss_sel = 0xffff; > > if (nseg_desc.type & 8) > - ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, > - &nseg_desc); > + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, > + old_tss_base, &nseg_desc); > else > - ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, > - &nseg_desc); > + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, > + old_tss_base, &nseg_desc); > > if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { > u32 eflags = kvm_x86_ops->get_rflags(vcpu); > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 257 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-30 16:04 ` Jan Kiszka @ 2009-03-30 16:21 ` Gleb Natapov 2009-03-30 16:35 ` Jan Kiszka 2009-03-31 9:03 ` Kohl, Bernhard (NSN - DE/Munich) 1 sibling, 1 reply; 18+ messages in thread From: Gleb Natapov @ 2009-03-30 16:21 UTC (permalink / raw) To: Jan Kiszka Cc: avi, kvm, Bernhard Kohl, Ostler, Thomas (NSN - DE/Munich), bliitz On Mon, Mar 30, 2009 at 06:04:45PM +0200, Jan Kiszka wrote: > Gleb Natapov wrote: > > The patch fixes two problems with task switching. > > 1. Back link is written to a wrong TSS. > > 2. Instruction emulation is not needed if the reason for task switch > > is a task gate in IDT and access to it is caused by an external even. > > > > 2 is currently solved only for VMX since there is not reliable way to > > skip an instruction in SVM. We should emulate it instead. > > Does this series fix all issues Bernhard, Thomas and Julian stumbled over? > Haven't tried. I wrote my own tests for task switching. How can I check it? -- Gleb. ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-30 16:21 ` Gleb Natapov @ 2009-03-30 16:35 ` Jan Kiszka 2009-03-30 16:39 ` Gleb Natapov 2009-03-30 16:46 ` Gleb Natapov 0 siblings, 2 replies; 18+ messages in thread From: Jan Kiszka @ 2009-03-30 16:35 UTC (permalink / raw) To: Gleb Natapov Cc: avi, kvm, Bernhard Kohl, Ostler, Thomas (NSN - DE/Munich), bliitz [-- Attachment #1: Type: text/plain, Size: 1318 bytes --] Gleb Natapov wrote: > On Mon, Mar 30, 2009 at 06:04:45PM +0200, Jan Kiszka wrote: >> Gleb Natapov wrote: >>> The patch fixes two problems with task switching. >>> 1. Back link is written to a wrong TSS. >>> 2. Instruction emulation is not needed if the reason for task switch >>> is a task gate in IDT and access to it is caused by an external even. >>> >>> 2 is currently solved only for VMX since there is not reliable way to >>> skip an instruction in SVM. We should emulate it instead. >> Does this series fix all issues Bernhard, Thomas and Julian stumbled over? >> > Haven't tried. I wrote my own tests for task switching. How can I check it? > There is a test case attached to Julian's sourceforge-reported bug: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599 And I guess Thomas or Bernhard will be happy to give it a try, too... :) There was one issue, the IRQ injection bug [1] which was related to IRQ tasks IIRC. Thomas and I finally suspected after a private chat that there is actually a different reason behind it, something like interrupt.pending should be cleared when the injection took place via an (emulated) task switch. Any news on this, Thomas? Jan [1] http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/29288 [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 257 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-30 16:35 ` Jan Kiszka @ 2009-03-30 16:39 ` Gleb Natapov 2009-03-30 16:46 ` Gleb Natapov 1 sibling, 0 replies; 18+ messages in thread From: Gleb Natapov @ 2009-03-30 16:39 UTC (permalink / raw) To: Jan Kiszka Cc: avi, kvm, Bernhard Kohl, Ostler, Thomas (NSN - DE/Munich), bliitz On Mon, Mar 30, 2009 at 06:35:05PM +0200, Jan Kiszka wrote: > Gleb Natapov wrote: > > On Mon, Mar 30, 2009 at 06:04:45PM +0200, Jan Kiszka wrote: > >> Gleb Natapov wrote: > >>> The patch fixes two problems with task switching. > >>> 1. Back link is written to a wrong TSS. > >>> 2. Instruction emulation is not needed if the reason for task switch > >>> is a task gate in IDT and access to it is caused by an external even. > >>> > >>> 2 is currently solved only for VMX since there is not reliable way to > >>> skip an instruction in SVM. We should emulate it instead. > >> Does this series fix all issues Bernhard, Thomas and Julian stumbled over? > >> > > Haven't tried. I wrote my own tests for task switching. How can I check it? > > > > There is a test case attached to Julian's sourceforge-reported bug: > > https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599 > I'll try that. > And I guess Thomas or Bernhard will be happy to give it a try, too... :) > > There was one issue, the IRQ injection bug [1] which was related to IRQ > tasks IIRC. Thomas and I finally suspected after a private chat that > there is actually a different reason behind it, something like > interrupt.pending should be cleared when the injection took place via an > (emulated) task switch. Any news on this, Thomas? > If this is the case then the patch series should fix it. > Jan > > [1] http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/29288 > -- Gleb. ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-30 16:35 ` Jan Kiszka 2009-03-30 16:39 ` Gleb Natapov @ 2009-03-30 16:46 ` Gleb Natapov 2009-03-30 23:54 ` Julian Stecklina 1 sibling, 1 reply; 18+ messages in thread From: Gleb Natapov @ 2009-03-30 16:46 UTC (permalink / raw) To: Jan Kiszka Cc: avi, kvm, Bernhard Kohl, Ostler, Thomas (NSN - DE/Munich), bliitz On Mon, Mar 30, 2009 at 06:35:05PM +0200, Jan Kiszka wrote: > > Haven't tried. I wrote my own tests for task switching. How can I check it? > > > > There is a test case attached to Julian's sourceforge-reported bug: > > https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599 > Works for me. -- Gleb. ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-30 16:46 ` Gleb Natapov @ 2009-03-30 23:54 ` Julian Stecklina 0 siblings, 0 replies; 18+ messages in thread From: Julian Stecklina @ 2009-03-30 23:54 UTC (permalink / raw) To: kvm-u79uwXL29TY76Z2rM5mHXA Gleb Natapov <gleb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> writes: > On Mon, Mar 30, 2009 at 06:35:05PM +0200, Jan Kiszka wrote: >> > Haven't tried. I wrote my own tests for task switching. How can I check it? >> > >> >> There is a test case attached to Julian's sourceforge-reported bug: >> >> https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599 >> > Works for me. Then the patches should be fine (at least for me *g*). Regards, -- Julian Stecklina The day Microsoft makes something that doesn't suck is probably the day they start making vacuum cleaners - Ernst Jan Plugge -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 18+ messages in thread
* RE: [PATCH 4/4] Fix task switching. 2009-03-30 16:04 ` Jan Kiszka 2009-03-30 16:21 ` Gleb Natapov @ 2009-03-31 9:03 ` Kohl, Bernhard (NSN - DE/Munich) 2009-03-31 15:21 ` Kohl, Bernhard (NSN - DE/Munich) 1 sibling, 1 reply; 18+ messages in thread From: Kohl, Bernhard (NSN - DE/Munich) @ 2009-03-31 9:03 UTC (permalink / raw) To: jan.kiszka, Gleb Natapov Cc: avi, kvm, Ostler, Thomas (NSN - DE/Munich), bliitz Jan Kiszka wrote: > > Gleb Natapov wrote: > > The patch fixes two problems with task switching. > > 1. Back link is written to a wrong TSS. > > 2. Instruction emulation is not needed if the reason for task switch > > is a task gate in IDT and access to it is caused by an > external even. > > > > 2 is currently solved only for VMX since there is not > reliable way to > > skip an instruction in SVM. We should emulate it instead. > > Does this series fix all issues Bernhard, Thomas and Julian > stumbled over? > > Jan I will try this today. Thanks. Bernhard ^ permalink raw reply [flat|nested] 18+ messages in thread
* RE: [PATCH 4/4] Fix task switching. 2009-03-31 9:03 ` Kohl, Bernhard (NSN - DE/Munich) @ 2009-03-31 15:21 ` Kohl, Bernhard (NSN - DE/Munich) 2009-03-31 15:22 ` Gleb Natapov 0 siblings, 1 reply; 18+ messages in thread From: Kohl, Bernhard (NSN - DE/Munich) @ 2009-03-31 15:21 UTC (permalink / raw) To: Kohl, Bernhard (NSN - DE/Munich), jan.kiszka, Gleb Natapov Cc: avi, kvm, Ostler, Thomas (NSN - DE/Munich), bliitz Bernhard Kohl wrote: > > Jan Kiszka wrote: > > > > Gleb Natapov wrote: > > > The patch fixes two problems with task switching. > > > 1. Back link is written to a wrong TSS. > > > 2. Instruction emulation is not needed if the reason for > task switch > > > is a task gate in IDT and access to it is caused by an > > external even. > > > > > > 2 is currently solved only for VMX since there is not > > reliable way to > > > skip an instruction in SVM. We should emulate it instead. > > > > Does this series fix all issues Bernhard, Thomas and Julian > > stumbled over? > > > > Jan > > I will try this today. Thanks. > Yes, it works for us (Thomas + Bernhard). Bernhard ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-31 15:21 ` Kohl, Bernhard (NSN - DE/Munich) @ 2009-03-31 15:22 ` Gleb Natapov 2009-04-01 7:21 ` Jan Kiszka 0 siblings, 1 reply; 18+ messages in thread From: Gleb Natapov @ 2009-03-31 15:22 UTC (permalink / raw) To: Kohl, Bernhard (NSN - DE/Munich) Cc: jan.kiszka, avi, kvm, Ostler, Thomas (NSN - DE/Munich), bliitz On Tue, Mar 31, 2009 at 05:21:16PM +0200, Kohl, Bernhard (NSN - DE/Munich) wrote: > Bernhard Kohl wrote: > > > > Jan Kiszka wrote: > > > > > > Gleb Natapov wrote: > > > > The patch fixes two problems with task switching. > > > > 1. Back link is written to a wrong TSS. > > > > 2. Instruction emulation is not needed if the reason for > > task switch > > > > is a task gate in IDT and access to it is caused by an > > > external even. > > > > > > > > 2 is currently solved only for VMX since there is not > > > reliable way to > > > > skip an instruction in SVM. We should emulate it instead. > > > > > > Does this series fix all issues Bernhard, Thomas and Julian > > > stumbled over? > > > > > > Jan > > > > I will try this today. Thanks. > > > Yes, it works for us (Thomas + Bernhard). > Great. Thanks for testing. -- Gleb. ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/4] Fix task switching. 2009-03-31 15:22 ` Gleb Natapov @ 2009-04-01 7:21 ` Jan Kiszka 0 siblings, 0 replies; 18+ messages in thread From: Jan Kiszka @ 2009-04-01 7:21 UTC (permalink / raw) To: Gleb Natapov Cc: Kohl, Bernhard (NSN - DE/Munich), avi, kvm, Ostler, Thomas (NSN - DE/Munich), bliitz [-- Attachment #1: Type: text/plain, Size: 920 bytes --] Gleb Natapov wrote: > On Tue, Mar 31, 2009 at 05:21:16PM +0200, Kohl, Bernhard (NSN - DE/Munich) wrote: >> Bernhard Kohl wrote: >>> Jan Kiszka wrote: >>>> Gleb Natapov wrote: >>>>> The patch fixes two problems with task switching. >>>>> 1. Back link is written to a wrong TSS. >>>>> 2. Instruction emulation is not needed if the reason for >>> task switch >>>>> is a task gate in IDT and access to it is caused by an >>>> external even. >>>>> 2 is currently solved only for VMX since there is not >>>> reliable way to >>>>> skip an instruction in SVM. We should emulate it instead. >>>> Does this series fix all issues Bernhard, Thomas and Julian >>>> stumbled over? >>>> >>>> Jan >>> I will try this today. Thanks. >>> >> Yes, it works for us (Thomas + Bernhard). >> > Great. Thanks for testing. > Same here: No obvious regressions found while running various NMI/IRQ tests. Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 257 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2009-04-01 7:21 UTC | newest] Thread overview: 18+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-03-29 14:12 [PATCH 1/4] Fix handling of a fault during NMI unblocked due to IRET Gleb Natapov 2009-03-29 14:12 ` [PATCH 2/4] Rewrite twisted maze of if() statements with more straightforward switch() Gleb Natapov 2009-03-30 7:35 ` Avi Kivity 2009-03-30 16:03 ` Jan Kiszka 2009-03-29 14:12 ` [PATCH 3/4] Do not zero idt_vectoring_info in vmx_complete_interrupts() Gleb Natapov 2009-03-29 14:12 ` [PATCH 4/4] Fix task switching Gleb Natapov 2009-03-30 7:39 ` Avi Kivity 2009-03-30 13:06 ` Gleb Natapov 2009-03-30 16:04 ` Jan Kiszka 2009-03-30 16:21 ` Gleb Natapov 2009-03-30 16:35 ` Jan Kiszka 2009-03-30 16:39 ` Gleb Natapov 2009-03-30 16:46 ` Gleb Natapov 2009-03-30 23:54 ` Julian Stecklina 2009-03-31 9:03 ` Kohl, Bernhard (NSN - DE/Munich) 2009-03-31 15:21 ` Kohl, Bernhard (NSN - DE/Munich) 2009-03-31 15:22 ` Gleb Natapov 2009-04-01 7:21 ` Jan Kiszka
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox