* Implement generic double fault generation mechanism
@ 2009-04-30 7:24 Dong, Eddie
2009-05-03 10:53 ` Gleb Natapov
2009-05-08 8:19 ` Dong, Eddie
0 siblings, 2 replies; 33+ messages in thread
From: Dong, Eddie @ 2009-04-30 7:24 UTC (permalink / raw)
To: kvm@vger.kernel.org; +Cc: Avi Kivity, Dong, Eddie
[-- Attachment #1: Type: text/plain, Size: 3351 bytes --]
Move Double-Fault generation logic out of page fault
exception generating function to cover more generic case.
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab1fdac..51a8dad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -162,12 +162,59 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+#define EXCPT_BENIGN 0
+#define EXCPT_CONTRIBUTORY 1
+#define EXCPT_PF 2
+
+static int exception_class(int vector)
+{
+ if (vector == 14)
+ return EXCPT_PF;
+ else if (vector == 0 || (vector >= 10 && vector <= 13))
+ return EXCPT_CONTRIBUTORY;
+ else
+ return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+ unsigned nr, bool has_error, u32 error_code)
+{
+ u32 prev_nr;
+ int class1, class2;
+
+ if (!vcpu->arch.exception.pending) {
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = has_error;
+ vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.error_code = error_code;
+ return;
+ }
+
+ /* to check exception */
+ prev_nr = vcpu->arch.exception.nr;
+ class2 = exception_class(nr);
+ class1 = exception_class(prev_nr);
+ if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+ || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+ /* generate double fault per SDM Table 5-5 */
+ printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
+ prev_nr, nr);
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = 1;
+ vcpu->arch.exception.nr = DF_VECTOR;
+ vcpu->arch.exception.error_code = 0;
+ if (prev_nr == DF_VECTOR) {
+ /* triple fault -> shutdown */
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ }
+ } else
+ printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
+ prev_nr, nr);
+}
+
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = false;
- vcpu->arch.exception.nr = nr;
+ kvm_multiple_exception(vcpu, nr, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
@@ -176,18 +223,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
++vcpu->stat.pf_guest;
- if (vcpu->arch.exception.pending) {
- if (vcpu->arch.exception.nr == PF_VECTOR) {
- printk(KERN_DEBUG "kvm: inject_page_fault:"
- " double fault 0x%lx\n", addr);
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
- } else if (vcpu->arch.exception.nr == DF_VECTOR) {
- /* triple fault -> shutdown */
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
- }
- return;
- }
vcpu->arch.cr2 = addr;
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
}
@@ -200,11 +235,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = true;
- vcpu->arch.exception.nr = nr;
- vcpu->arch.exception.error_code = error_code;
+ kvm_multiple_exception(vcpu, nr, true, error_code);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
[-- Attachment #2: irq3.patch --]
[-- Type: application/octet-stream, Size: 3373 bytes --]
commit 67f7029b428ff508c840dca41c67a7ee3a0e156e
Author: root <root@eddie-wb.localdomain>
Date: Thu Apr 30 15:59:37 2009 +0800
Move Double-Fault generation logic out of page fault
exception generating function to cover more generic case.
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab1fdac..51a8dad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -162,12 +162,59 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+#define EXCPT_BENIGN 0
+#define EXCPT_CONTRIBUTORY 1
+#define EXCPT_PF 2
+
+static int exception_class(int vector)
+{
+ if (vector == 14)
+ return EXCPT_PF;
+ else if (vector == 0 || (vector >= 10 && vector <= 13))
+ return EXCPT_CONTRIBUTORY;
+ else
+ return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+ unsigned nr, bool has_error, u32 error_code)
+{
+ u32 prev_nr;
+ int class1, class2;
+
+ if (!vcpu->arch.exception.pending) {
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = has_error;
+ vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.error_code = error_code;
+ return;
+ }
+
+ /* to check exception */
+ prev_nr = vcpu->arch.exception.nr;
+ class2 = exception_class(nr);
+ class1 = exception_class(prev_nr);
+ if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+ || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+ /* generate double fault per SDM Table 5-5 */
+ printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
+ prev_nr, nr);
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = 1;
+ vcpu->arch.exception.nr = DF_VECTOR;
+ vcpu->arch.exception.error_code = 0;
+ if (prev_nr == DF_VECTOR) {
+ /* triple fault -> shutdown */
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ }
+ } else
+ printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
+ prev_nr, nr);
+}
+
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = false;
- vcpu->arch.exception.nr = nr;
+ kvm_multiple_exception(vcpu, nr, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
@@ -176,18 +223,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
++vcpu->stat.pf_guest;
- if (vcpu->arch.exception.pending) {
- if (vcpu->arch.exception.nr == PF_VECTOR) {
- printk(KERN_DEBUG "kvm: inject_page_fault:"
- " double fault 0x%lx\n", addr);
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
- } else if (vcpu->arch.exception.nr == DF_VECTOR) {
- /* triple fault -> shutdown */
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
- }
- return;
- }
vcpu->arch.cr2 = addr;
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
}
@@ -200,11 +235,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = true;
- vcpu->arch.exception.nr = nr;
- vcpu->arch.exception.error_code = error_code;
+ kvm_multiple_exception(vcpu, nr, true, error_code);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-04-30 7:24 Implement generic double fault generation mechanism Dong, Eddie
@ 2009-05-03 10:53 ` Gleb Natapov
2009-05-08 8:27 ` Dong, Eddie
2009-05-08 8:19 ` Dong, Eddie
1 sibling, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-03 10:53 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Thu, Apr 30, 2009 at 03:24:07PM +0800, Dong, Eddie wrote:
>
>
> Move Double-Fault generation logic out of page fault
> exception generating function to cover more generic case.
>
> Signed-off-by: Eddie Dong <eddie.dong@intel.com>
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ab1fdac..51a8dad 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -162,12 +162,59 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
> }
> EXPORT_SYMBOL_GPL(kvm_set_apic_base);
>
> +#define EXCPT_BENIGN 0
> +#define EXCPT_CONTRIBUTORY 1
> +#define EXCPT_PF 2
> +
> +static int exception_class(int vector)
> +{
> + if (vector == 14)
> + return EXCPT_PF;
> + else if (vector == 0 || (vector >= 10 && vector <= 13))
> + return EXCPT_CONTRIBUTORY;
> + else
> + return EXCPT_BENIGN;
> +}
> +
This makes double fault (8) benign exception. Surely not what you want.
> +static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
> + unsigned nr, bool has_error, u32 error_code)
> +{
> + u32 prev_nr;
> + int class1, class2;
> +
> + if (!vcpu->arch.exception.pending) {
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.has_error_code = has_error;
> + vcpu->arch.exception.nr = nr;
> + vcpu->arch.exception.error_code = error_code;
> + return;
> + }
> +
> + /* to check exception */
> + prev_nr = vcpu->arch.exception.nr;
> + class2 = exception_class(nr);
> + class1 = exception_class(prev_nr);
> + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
> + || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
> + /* generate double fault per SDM Table 5-5 */
> + printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
> + prev_nr, nr);
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.has_error_code = 1;
> + vcpu->arch.exception.nr = DF_VECTOR;
> + vcpu->arch.exception.error_code = 0;
> + if (prev_nr == DF_VECTOR) {
> + /* triple fault -> shutdown */
> + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
> + }
> + } else
> + printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
> + prev_nr, nr);
> +}
When two exceptions happens serially is is better to replace pending exception
with a new one. This way the first exception (that is lost) will be regenerated
when instruction will be re-executed.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-04-30 7:24 Implement generic double fault generation mechanism Dong, Eddie
2009-05-03 10:53 ` Gleb Natapov
@ 2009-05-08 8:19 ` Dong, Eddie
2009-05-08 8:28 ` Avi Kivity
1 sibling, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-08 8:19 UTC (permalink / raw)
To: Dong, Eddie, kvm@vger.kernel.org; +Cc: Avi Kivity, Dong, Eddie
Dong, Eddie wrote:
> Move Double-Fault generation logic out of page fault
> exception generating function to cover more generic case.
>
> Signed-off-by: Eddie Dong <eddie.dong@intel.com>
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ab1fdac..51a8dad 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -162,12 +162,59 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu,
> u64 data) }
> EXPORT_SYMBOL_GPL(kvm_set_apic_base);
>
> +#define EXCPT_BENIGN 0
> +#define EXCPT_CONTRIBUTORY 1
> +#define EXCPT_PF 2
> +
> +static int exception_class(int vector)
> +{
> + if (vector == 14)
> + return EXCPT_PF;
> + else if (vector == 0 || (vector >= 10 && vector <= 13))
> + return EXCPT_CONTRIBUTORY;
> + else
> + return EXCPT_BENIGN;
> +}
> +
> +static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
> + unsigned nr, bool has_error, u32 error_code)
> +{
> + u32 prev_nr;
> + int class1, class2;
> +
> + if (!vcpu->arch.exception.pending) {
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.has_error_code = has_error;
> + vcpu->arch.exception.nr = nr;
> + vcpu->arch.exception.error_code = error_code;
> + return;
> + }
> +
> + /* to check exception */
> + prev_nr = vcpu->arch.exception.nr;
> + class2 = exception_class(nr);
> + class1 = exception_class(prev_nr);
> + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
> + || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
> + /* generate double fault per SDM Table 5-5 */
> + printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
> + prev_nr, nr);
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.has_error_code = 1;
> + vcpu->arch.exception.nr = DF_VECTOR;
> + vcpu->arch.exception.error_code = 0;
> + if (prev_nr == DF_VECTOR) {
> + /* triple fault -> shutdown */
> + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
> + }
> + } else
> + printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
> + prev_nr, nr);
> +}
> +
> void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
> {
> - WARN_ON(vcpu->arch.exception.pending);
> - vcpu->arch.exception.pending = true;
> - vcpu->arch.exception.has_error_code = false;
> - vcpu->arch.exception.nr = nr;
> + kvm_multiple_exception(vcpu, nr, false, 0);
> }
> EXPORT_SYMBOL_GPL(kvm_queue_exception);
>
> @@ -176,18 +223,6 @@ void kvm_inject_page_fault(struct kvm_vcpu
> *vcpu, unsigned long addr, {
> ++vcpu->stat.pf_guest;
>
> - if (vcpu->arch.exception.pending) {
> - if (vcpu->arch.exception.nr == PF_VECTOR) {
> - printk(KERN_DEBUG "kvm: inject_page_fault:"
> - " double fault 0x%lx\n", addr);
> - vcpu->arch.exception.nr = DF_VECTOR;
> - vcpu->arch.exception.error_code = 0;
> - } else if (vcpu->arch.exception.nr == DF_VECTOR) {
> - /* triple fault -> shutdown */
> - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
> - }
> - return;
> - }
> vcpu->arch.cr2 = addr;
> kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
> }
> @@ -200,11 +235,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
>
> void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32
> error_code) {
> - WARN_ON(vcpu->arch.exception.pending);
> - vcpu->arch.exception.pending = true;
> - vcpu->arch.exception.has_error_code = true;
> - vcpu->arch.exception.nr = nr;
> - vcpu->arch.exception.error_code = error_code;
> + kvm_multiple_exception(vcpu, nr, true, error_code);
> }
> EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-03 10:53 ` Gleb Natapov
@ 2009-05-08 8:27 ` Dong, Eddie
2009-05-08 9:53 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-08 8:27 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
[-- Attachment #1: Type: text/plain, Size: 5012 bytes --]
Gleb Natapov wrote:
>> +
>> +static int exception_class(int vector)
>> +{
>> + if (vector == 14)
>> + return EXCPT_PF;
>> + else if (vector == 0 || (vector >= 10 && vector <= 13)) + return
>> EXCPT_CONTRIBUTORY; + else
>> + return EXCPT_BENIGN;
>> +}
>> +
> This makes double fault (8) benign exception. Surely not what you
> want.
double fault fall into neither of above class per SDM. But it should be
checked earlier than generating DB fault. See new updated.
>> + /* to check exception */
>> + prev_nr = vcpu->arch.exception.nr;
>> + class2 = exception_class(nr);
>> + class1 = exception_class(prev_nr);
>> + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
>> + || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
>> + /* generate double fault per SDM Table 5-5 */
>> + printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
>> + prev_nr, nr); + vcpu->arch.exception.pending = true;
>> + vcpu->arch.exception.has_error_code = 1;
>> + vcpu->arch.exception.nr = DF_VECTOR;
>> + vcpu->arch.exception.error_code = 0;
>> + if (prev_nr == DF_VECTOR) {
>> + /* triple fault -> shutdown */
>> + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + }
>> + } else
>> + printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
>> + prev_nr, nr); +}
> When two exceptions happens serially is is better to replace pending
> exception with a new one. This way the first exception (that is lost)
> will be regenerated when instruction will be re-executed.
Do you want it to be covered for now? For exception, it is easy but for IRQ, it needs to be pushed back.
Thx, eddie
Move Double-Fault generation logic out of page fault
exception generating function to cover more generic case.
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab1fdac..d0e75a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -162,12 +162,60 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+#define EXCPT_BENIGN 0
+#define EXCPT_CONTRIBUTORY 1
+#define EXCPT_PF 2
+
+static int exception_class(int vector)
+{
+ if (vector == 14)
+ return EXCPT_PF;
+ else if (vector == 0 || (vector >= 10 && vector <= 13))
+ return EXCPT_CONTRIBUTORY;
+ else
+ return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+ unsigned nr, bool has_error, u32 error_code)
+{
+ u32 prev_nr;
+ int class1, class2;
+
+ if (!vcpu->arch.exception.pending) {
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = has_error;
+ vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.error_code = error_code;
+ return;
+ }
+
+ /* to check exception */
+ prev_nr = vcpu->arch.exception.nr;
+ if (prev_nr == DF_VECTOR) {
+ /* triple fault -> shutdown */
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ return;
+ }
+ class1 = exception_class(prev_nr);
+ class2 = exception_class(nr);
+ if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+ || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+ /* generate double fault per SDM Table 5-5 */
+ printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
+ prev_nr, nr);
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = true;
+ vcpu->arch.exception.nr = DF_VECTOR;
+ vcpu->arch.exception.error_code = 0;
+ } else
+ printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
+ prev_nr, nr);
+}
+
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = false;
- vcpu->arch.exception.nr = nr;
+ kvm_multiple_exception(vcpu, nr, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
@@ -176,18 +224,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
++vcpu->stat.pf_guest;
- if (vcpu->arch.exception.pending) {
- if (vcpu->arch.exception.nr == PF_VECTOR) {
- printk(KERN_DEBUG "kvm: inject_page_fault:"
- " double fault 0x%lx\n", addr);
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
- } else if (vcpu->arch.exception.nr == DF_VECTOR) {
- /* triple fault -> shutdown */
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
- }
- return;
- }
vcpu->arch.cr2 = addr;
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
}
@@ -200,11 +236,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = true;
- vcpu->arch.exception.nr = nr;
- vcpu->arch.exception.error_code = error_code;
+ kvm_multiple_exception(vcpu, nr, true, error_code);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
[-- Attachment #2: irq2.patch --]
[-- Type: application/octet-stream, Size: 3255 bytes --]
Move Double-Fault generation logic out of page fault
exception generating function to cover more generic case.
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab1fdac..d0e75a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -162,12 +162,60 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+#define EXCPT_BENIGN 0
+#define EXCPT_CONTRIBUTORY 1
+#define EXCPT_PF 2
+
+static int exception_class(int vector)
+{
+ if (vector == 14)
+ return EXCPT_PF;
+ else if (vector == 0 || (vector >= 10 && vector <= 13))
+ return EXCPT_CONTRIBUTORY;
+ else
+ return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+ unsigned nr, bool has_error, u32 error_code)
+{
+ u32 prev_nr;
+ int class1, class2;
+
+ if (!vcpu->arch.exception.pending) {
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = has_error;
+ vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.error_code = error_code;
+ return;
+ }
+
+ /* to check exception */
+ prev_nr = vcpu->arch.exception.nr;
+ if (prev_nr == DF_VECTOR) {
+ /* triple fault -> shutdown */
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ return;
+ }
+ class1 = exception_class(prev_nr);
+ class2 = exception_class(nr);
+ if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+ || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+ /* generate double fault per SDM Table 5-5 */
+ printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
+ prev_nr, nr);
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = true;
+ vcpu->arch.exception.nr = DF_VECTOR;
+ vcpu->arch.exception.error_code = 0;
+ } else
+ printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
+ prev_nr, nr);
+}
+
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = false;
- vcpu->arch.exception.nr = nr;
+ kvm_multiple_exception(vcpu, nr, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
@@ -176,18 +224,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
++vcpu->stat.pf_guest;
- if (vcpu->arch.exception.pending) {
- if (vcpu->arch.exception.nr == PF_VECTOR) {
- printk(KERN_DEBUG "kvm: inject_page_fault:"
- " double fault 0x%lx\n", addr);
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
- } else if (vcpu->arch.exception.nr == DF_VECTOR) {
- /* triple fault -> shutdown */
- set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
- }
- return;
- }
vcpu->arch.cr2 = addr;
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
}
@@ -200,11 +236,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- WARN_ON(vcpu->arch.exception.pending);
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = true;
- vcpu->arch.exception.nr = nr;
- vcpu->arch.exception.error_code = error_code;
+ kvm_multiple_exception(vcpu, nr, true, error_code);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-08 8:19 ` Dong, Eddie
@ 2009-05-08 8:28 ` Avi Kivity
0 siblings, 0 replies; 33+ messages in thread
From: Avi Kivity @ 2009-05-08 8:28 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org
Dong, Eddie wrote:
No content (except for quoted message)?
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-08 8:27 ` Dong, Eddie
@ 2009-05-08 9:53 ` Gleb Natapov
2009-05-08 10:39 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-08 9:53 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Fri, May 08, 2009 at 04:27:28PM +0800, Dong, Eddie wrote:
> Gleb Natapov wrote:
> >> +
> >> +static int exception_class(int vector)
> >> +{
> >> + if (vector == 14)
> >> + return EXCPT_PF;
> >> + else if (vector == 0 || (vector >= 10 && vector <= 13)) + return
> >> EXCPT_CONTRIBUTORY; + else
> >> + return EXCPT_BENIGN;
> >> +}
> >> +
> > This makes double fault (8) benign exception. Surely not what you
> > want.
>
> double fault fall into neither of above class per SDM. But it should be
> checked earlier than generating DB fault. See new updated.
> >> + /* to check exception */
> >> + prev_nr = vcpu->arch.exception.nr;
> >> + class2 = exception_class(nr);
> >> + class1 = exception_class(prev_nr);
> >> + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
> >> + || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
> >> + /* generate double fault per SDM Table 5-5 */
> >> + printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
> >> + prev_nr, nr); + vcpu->arch.exception.pending = true;
> >> + vcpu->arch.exception.has_error_code = 1;
> >> + vcpu->arch.exception.nr = DF_VECTOR;
> >> + vcpu->arch.exception.error_code = 0;
> >> + if (prev_nr == DF_VECTOR) {
> >> + /* triple fault -> shutdown */
> >> + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + }
> >> + } else
> >> + printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
> >> + prev_nr, nr); +}
> > When two exceptions happens serially is is better to replace pending
> > exception with a new one. This way the first exception (that is lost)
> > will be regenerated when instruction will be re-executed.
>
> Do you want it to be covered for now? For exception, it is easy but for IRQ, it needs to be pushed back.
>
Yes I want it to be covered now otherwise any serial exception generates
flood of "Exception happens serially" messages. This function does not
handle IRQ so no problem there.
> Thx, eddie
>
>
>
> Move Double-Fault generation logic out of page fault
> exception generating function to cover more generic case.
>
> Signed-off-by: Eddie Dong <eddie.dong@intel.com>
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ab1fdac..d0e75a2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -162,12 +162,60 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
> }
> EXPORT_SYMBOL_GPL(kvm_set_apic_base);
>
> +#define EXCPT_BENIGN 0
> +#define EXCPT_CONTRIBUTORY 1
> +#define EXCPT_PF 2
> +
> +static int exception_class(int vector)
> +{
> + if (vector == 14)
> + return EXCPT_PF;
> + else if (vector == 0 || (vector >= 10 && vector <= 13))
> + return EXCPT_CONTRIBUTORY;
> + else
> + return EXCPT_BENIGN;
> +}
> +
> +static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
> + unsigned nr, bool has_error, u32 error_code)
> +{
> + u32 prev_nr;
> + int class1, class2;
> +
> + if (!vcpu->arch.exception.pending) {
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.has_error_code = has_error;
> + vcpu->arch.exception.nr = nr;
> + vcpu->arch.exception.error_code = error_code;
> + return;
> + }
> +
> + /* to check exception */
> + prev_nr = vcpu->arch.exception.nr;
> + if (prev_nr == DF_VECTOR) {
> + /* triple fault -> shutdown */
> + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
> + return;
> + }
> + class1 = exception_class(prev_nr);
> + class2 = exception_class(nr);
> + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
> + || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
> + /* generate double fault per SDM Table 5-5 */
> + printk(KERN_DEBUG "kvm: double fault 0x%x on 0x%x\n",
> + prev_nr, nr);
> + vcpu->arch.exception.pending = true;
> + vcpu->arch.exception.has_error_code = true;
> + vcpu->arch.exception.nr = DF_VECTOR;
> + vcpu->arch.exception.error_code = 0;
> + } else
> + printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
> + prev_nr, nr);
> +}
> +
> void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
> {
> - WARN_ON(vcpu->arch.exception.pending);
> - vcpu->arch.exception.pending = true;
> - vcpu->arch.exception.has_error_code = false;
> - vcpu->arch.exception.nr = nr;
> + kvm_multiple_exception(vcpu, nr, false, 0);
> }
> EXPORT_SYMBOL_GPL(kvm_queue_exception);
>
> @@ -176,18 +224,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
> {
> ++vcpu->stat.pf_guest;
>
> - if (vcpu->arch.exception.pending) {
> - if (vcpu->arch.exception.nr == PF_VECTOR) {
> - printk(KERN_DEBUG "kvm: inject_page_fault:"
> - " double fault 0x%lx\n", addr);
> - vcpu->arch.exception.nr = DF_VECTOR;
> - vcpu->arch.exception.error_code = 0;
> - } else if (vcpu->arch.exception.nr == DF_VECTOR) {
> - /* triple fault -> shutdown */
> - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
> - }
> - return;
> - }
> vcpu->arch.cr2 = addr;
> kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
> }
> @@ -200,11 +236,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
>
> void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
> {
> - WARN_ON(vcpu->arch.exception.pending);
> - vcpu->arch.exception.pending = true;
> - vcpu->arch.exception.has_error_code = true;
> - vcpu->arch.exception.nr = nr;
> - vcpu->arch.exception.error_code = error_code;
> + kvm_multiple_exception(vcpu, nr, true, error_code);
> }
> EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
>
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-08 9:53 ` Gleb Natapov
@ 2009-05-08 10:39 ` Dong, Eddie
2009-05-08 10:46 ` Dong, Eddie
2009-05-08 12:16 ` Implement generic double fault generation mechanism Gleb Natapov
0 siblings, 2 replies; 33+ messages in thread
From: Dong, Eddie @ 2009-05-08 10:39 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
ction will be re-executed.
>>
>> Do you want it to be covered for now? For exception, it is easy but
>> for IRQ, it needs to be pushed back.
>>
> Yes I want it to be covered now otherwise any serial exception
> generates flood of "Exception happens serially" messages. This
> function does not handle IRQ so no problem there.
But we soon will let this function cove IRQ as well per SDM.
Why not do that a little bit later?
BTW, this issue exist in original code as well.
Eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-08 10:39 ` Dong, Eddie
@ 2009-05-08 10:46 ` Dong, Eddie
2009-05-08 12:23 ` Gleb Natapov
2009-05-08 12:16 ` Implement generic double fault generation mechanism Gleb Natapov
1 sibling, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-08 10:46 UTC (permalink / raw)
To: Dong, Eddie, Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
Dong, Eddie wrote:
> ction will be re-executed.
>>>
>>> Do you want it to be covered for now? For exception, it is easy but
>>> for IRQ, it needs to be pushed back.
>>>
>> Yes I want it to be covered now otherwise any serial exception
>> generates flood of "Exception happens serially" messages. This
>> function does not handle IRQ so no problem there.
>
> But we soon will let this function cove IRQ as well per SDM.
> Why not do that a little bit later?
>
> BTW, this issue exist in original code as well.
>
> Eddie
Actually this is already addressed in current patch too: Just keep the former exception. If you mean the prink should be removed, I am fine.
BTW, this case doesn't happen in reality.
Thx, eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-08 10:39 ` Dong, Eddie
2009-05-08 10:46 ` Dong, Eddie
@ 2009-05-08 12:16 ` Gleb Natapov
1 sibling, 0 replies; 33+ messages in thread
From: Gleb Natapov @ 2009-05-08 12:16 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Fri, May 08, 2009 at 06:39:06PM +0800, Dong, Eddie wrote:
> ction will be re-executed.
> >>
> >> Do you want it to be covered for now? For exception, it is easy but
> >> for IRQ, it needs to be pushed back.
> >>
> > Yes I want it to be covered now otherwise any serial exception
> > generates flood of "Exception happens serially" messages. This
> > function does not handle IRQ so no problem there.
>
> But we soon will let this function cove IRQ as well per SDM.
> Why not do that a little bit later?
>
> BTW, this issue exist in original code as well.
>
Yes, but I've already sent a patch to fix this in existing code. It
your patch will fix this to I'll drop my version.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-08 10:46 ` Dong, Eddie
@ 2009-05-08 12:23 ` Gleb Natapov
2009-05-08 15:00 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-08 12:23 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Fri, May 08, 2009 at 06:46:14PM +0800, Dong, Eddie wrote:
> Dong, Eddie wrote:
> > ction will be re-executed.
> >>>
> >>> Do you want it to be covered for now? For exception, it is easy but
> >>> for IRQ, it needs to be pushed back.
> >>>
> >> Yes I want it to be covered now otherwise any serial exception
> >> generates flood of "Exception happens serially" messages. This
> >> function does not handle IRQ so no problem there.
> >
> > But we soon will let this function cove IRQ as well per SDM.
> > Why not do that a little bit later?
> >
> > BTW, this issue exist in original code as well.
> >
> > Eddie
>
> Actually this is already addressed in current patch too: Just keep the former exception. If you mean the prink should be removed, I am fine.
Keeping the former exception is not the right thing to do. It can't be
delivered because delivering it cause another exception and handler that
may fix the situation is not called since you drop last exception and
keep re-injecting the one that can't be handled.
> BTW, this case doesn't happen in reality.
>
Then why do you write all this code then? :) I can easily write test
case that will do that (actually I did) and if not handled properly it
just loops taking 100% cpu trying to reinject exception that cannot be
handled.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-08 12:23 ` Gleb Natapov
@ 2009-05-08 15:00 ` Dong, Eddie
2009-05-08 18:44 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-08 15:00 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
Gleb Natapov wrote:
> On Fri, May 08, 2009 at 06:46:14PM +0800, Dong, Eddie wrote:
>> Dong, Eddie wrote:
>>> ction will be re-executed.
>>>>>
>>>>> Do you want it to be covered for now? For exception, it is easy
>>>>> but for IRQ, it needs to be pushed back.
>>>>>
>>>> Yes I want it to be covered now otherwise any serial exception
>>>> generates flood of "Exception happens serially" messages. This
>>>> function does not handle IRQ so no problem there.
>>>
>>> But we soon will let this function cove IRQ as well per SDM.
>>> Why not do that a little bit later?
>>>
>>> BTW, this issue exist in original code as well.
>>>
>>> Eddie
>>
>> Actually this is already addressed in current patch too: Just keep
>> the former exception. If you mean the prink should be removed, I am
>> fine.
> Keeping the former exception is not the right thing to do. It can't be
> delivered because delivering it cause another exception and handler
> that may fix the situation is not called since you drop last
> exception and keep re-injecting the one that can't be handled.
>
>> BTW, this case doesn't happen in reality.
>>
> Then why do you write all this code then? :) I can easily write test
I am fixing the potential #DF bug existing in current code which only handle
PF on PF.
For those sequential exception, it is WARN_ON in current code.
> case that will do that (actually I did) and if not handled properly it
> just loops taking 100% cpu trying to reinject exception that cannot be
> handled.
Are u sure current code is dead loop in WARN_ON with your test code?
I don't see it will never happen and thus why printk it, but shouldn't exist
in current guest that KVM can support.
See original kvm_queue_exception in case you ignored the code.
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
WARN_ON(vcpu->arch.exception.pending);
vcpu->arch.exception.pending = true;
vcpu->arch.exception.has_error_code = false;
vcpu->arch.exception.nr = nr;
}
Any comments from Avi?
Thx, eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-08 15:00 ` Dong, Eddie
@ 2009-05-08 18:44 ` Gleb Natapov
2009-05-11 1:04 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-08 18:44 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Fri, May 08, 2009 at 11:00:51PM +0800, Dong, Eddie wrote:
> Gleb Natapov wrote:
> > On Fri, May 08, 2009 at 06:46:14PM +0800, Dong, Eddie wrote:
> >> Dong, Eddie wrote:
> >>> ction will be re-executed.
> >>>>>
> >>>>> Do you want it to be covered for now? For exception, it is easy
> >>>>> but for IRQ, it needs to be pushed back.
> >>>>>
> >>>> Yes I want it to be covered now otherwise any serial exception
> >>>> generates flood of "Exception happens serially" messages. This
> >>>> function does not handle IRQ so no problem there.
> >>>
> >>> But we soon will let this function cove IRQ as well per SDM.
> >>> Why not do that a little bit later?
> >>>
> >>> BTW, this issue exist in original code as well.
> >>>
> >>> Eddie
> >>
> >> Actually this is already addressed in current patch too: Just keep
> >> the former exception. If you mean the prink should be removed, I am
> >> fine.
> > Keeping the former exception is not the right thing to do. It can't be
> > delivered because delivering it cause another exception and handler
> > that may fix the situation is not called since you drop last
> > exception and keep re-injecting the one that can't be handled.
> >
> >> BTW, this case doesn't happen in reality.
> >>
> > Then why do you write all this code then? :) I can easily write test
>
> I am fixing the potential #DF bug existing in current code which only handle
> PF on PF.
> For those sequential exception, it is WARN_ON in current code.
>
Can your describe real life scenario that needs this fix? I am all for
fixing code and be as close as possible to SDM, but if you do it do it right.
> > case that will do that (actually I did) and if not handled properly it
> > just loops taking 100% cpu trying to reinject exception that cannot be
> > handled.
>
> Are u sure current code is dead loop in WARN_ON with your test code?
Yes.
> I don't see it will never happen and thus why printk it, but shouldn't exist
I have the code that triggers this path. Good enough for me.
> in current guest that KVM can support.
>
> See original kvm_queue_exception in case you ignored the code.
>
There is not point referring to current code. Current code does not
handle serial exceptions properly. So fix it in your patch otherwise I
propose to use my patch that fixes current code
(http://patchwork.kernel.org/patch/21829/).
> void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
> {
> WARN_ON(vcpu->arch.exception.pending);
> vcpu->arch.exception.pending = true;
> vcpu->arch.exception.has_error_code = false;
> vcpu->arch.exception.nr = nr;
> }
>
> Any comments from Avi?
>
> Thx, eddie
>
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-08 18:44 ` Gleb Natapov
@ 2009-05-11 1:04 ` Dong, Eddie
2009-05-11 6:02 ` Gleb Natapov
2009-05-11 6:17 ` Avi Kivity
0 siblings, 2 replies; 33+ messages in thread
From: Dong, Eddie @ 2009-05-11 1:04 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
> There is not point referring to current code. Current code does not
> handle serial exceptions properly. So fix it in your patch otherwise I
> propose to use my patch that fixes current code
> (http://patchwork.kernel.org/patch/21829/).
>
I would like Avi to decide. As comments to the difference of 2 patches, my undrestanding is that I am addressing the problem base on SDM 5-4 with the answer to serial injection as first in first service. Your patch doesn;t solve generic double fault case for example exception 11 on 11, or GP on GP which needs to be converted to #DF per SDM, rather you only handle the case the secondary exception is PF, and servicing PF.
I can check with internal architecture to see what does "handle exceptions serially" mean in really. For me serial means first in first out, and thus we should remain 1st exception.
Eddie.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-11 1:04 ` Dong, Eddie
@ 2009-05-11 6:02 ` Gleb Natapov
2009-05-12 5:35 ` Dong, Eddie
2009-05-11 6:17 ` Avi Kivity
1 sibling, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-11 6:02 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Mon, May 11, 2009 at 09:04:52AM +0800, Dong, Eddie wrote:
>
> > There is not point referring to current code. Current code does not
> > handle serial exceptions properly. So fix it in your patch otherwise I
> > propose to use my patch that fixes current code
> > (http://patchwork.kernel.org/patch/21829/).
> >
>
> I would like Avi to decide. As comments to the difference of 2 patches, my undrestanding is that I am addressing the problem base on SDM 5-4 with the answer to serial injection as first in first service. Your patch doesn;t solve generic double fault case for example exception 11 on 11, or GP on GP which needs to be converted to #DF per SDM, rather you only handle the case the secondary exception is PF, and servicing PF.
>
There is nothing to decide really. I prefer your patch with serial
exception handling fixed. If you'll not do it I'll do it.
> I can check with internal architecture to see what does "handle exceptions serially" mean in really. For me serial means first in first out, and thus we should remain 1st exception.
>
There is a table 5.2 that defines an order between some events. The table
is not complete, I don't see #DE there for instance. But consider
this case: #DE (or #NP) happens while exception stack is paged out so
#PF happens next. #PF is handled by TSS gate so it uses its own stack
and it fixes exception stack in its handler. If we drop #PF because #DE
is already waiting we will keep trying to inject #DE indefinitely. The
result is hanging QEMU process eating 100% cpu time. If we replace #DE
with #PF on the other hand then #PF handler will fix exception stack
instruction that caused #DE will be re-executed, #DE regenerated and
handled properly. So which scenario do you prefer?
WFIW bochs/qemu replace old exception with a new one.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-11 1:04 ` Dong, Eddie
2009-05-11 6:02 ` Gleb Natapov
@ 2009-05-11 6:17 ` Avi Kivity
2009-05-12 7:38 ` event injection MACROs Dong, Eddie
1 sibling, 1 reply; 33+ messages in thread
From: Avi Kivity @ 2009-05-11 6:17 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Gleb Natapov, kvm@vger.kernel.org
Dong, Eddie wrote:
>> There is not point referring to current code. Current code does not
>> handle serial exceptions properly. So fix it in your patch otherwise I
>> propose to use my patch that fixes current code
>> (http://patchwork.kernel.org/patch/21829/).
>>
>>
>
> I would like Avi to decide.
I would prefer you two to reach agreement. Less work for me.
> I can check with internal architecture to see what does "handle exceptions serially" mean in really. For me serial means first in first out, and thus we should remain 1st exception.
>
The second exception was encountered while injecting the first
exception, so how can you continue with the first without servicing the
second?
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-11 6:02 ` Gleb Natapov
@ 2009-05-12 5:35 ` Dong, Eddie
2009-05-12 7:01 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-12 5:35 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
Gleb Natapov wrote:
> On Mon, May 11, 2009 at 09:04:52AM +0800, Dong, Eddie wrote:
>>
>>> There is not point referring to current code. Current code does not
>>> handle serial exceptions properly. So fix it in your patch
>>> otherwise I propose to use my patch that fixes current code
>>> (http://patchwork.kernel.org/patch/21829/).
>>>
>>
>> I would like Avi to decide. As comments to the difference of 2
>> patches, my undrestanding is that I am addressing the problem base
>> on SDM 5-4 with the answer to serial injection as first in first
>> service. Your patch doesn;t solve generic double fault case for
>> example exception 11 on 11, or GP on GP which needs to be converted
>> to #DF per SDM, rather you only handle the case the secondary
>> exception is PF, and servicing PF.
>>
> There is nothing to decide really. I prefer your patch with serial
> exception handling fixed. If you'll not do it I'll do it.
OK, an additional patch will be constructive but my position is neutral. The reason (mentioned) is:
1: Current KVM just WARN_ON for those case (and never be hit), so the this patch won't introduce
additional issues. Either printk or WARN_ON to notify us in case we met the problem in future is safer way for me.
2: In case of real "serial ecception" happens, from architectural point of view, I think we'd better consult Table 5-2 to prioritize them, which is neither reserving former exception nor overwritting. But as you mentioned, the list is not completed. My point is that this is another complicated scenario that we should spend time in future, but not related to current patch.
3: This function will soon needs to be extended to cover IRQ case too, which needs to push back the overwritten IRQ. We need a total solution for this, so I prefer to do that some time later.
4: I prefer to split issue if possible.
>
>> I can check with internal architecture to see what does "handle
>> exceptions serially" mean in really. For me serial means first in
>> first out, and thus we should remain 1st exception.
>>
> There is a table 5.2 that defines an order between some events. The
> table is not complete, I don't see #DE there for instance. But
> consider
> this case: #DE (or #NP) happens while exception stack is paged out so
> #PF happens next. #PF is handled by TSS gate so it uses its own stack
> and it fixes exception stack in its handler. If we drop #PF because
> #DE is already waiting we will keep trying to inject #DE
> indefinitely. The result is hanging QEMU process eating 100% cpu
> time. If we replace #DE with #PF on the other hand then #PF handler
> will fix exception stack instruction that caused #DE will be
> re-executed, #DE regenerated and handled properly. So which scenario
> do you prefer?
See above.
Thx, eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Implement generic double fault generation mechanism
2009-05-12 5:35 ` Dong, Eddie
@ 2009-05-12 7:01 ` Gleb Natapov
2009-05-12 15:06 ` Enable IRQ windows after exception injection if there are pending virq Dong, Eddie
2009-05-13 14:05 ` Implement generic double fault generation mechanism Dong, Eddie
0 siblings, 2 replies; 33+ messages in thread
From: Gleb Natapov @ 2009-05-12 7:01 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org, Avi Kivity
On Tue, May 12, 2009 at 01:35:31PM +0800, Dong, Eddie wrote:
> Gleb Natapov wrote:
> > On Mon, May 11, 2009 at 09:04:52AM +0800, Dong, Eddie wrote:
> >>
> >>> There is not point referring to current code. Current code does not
> >>> handle serial exceptions properly. So fix it in your patch
> >>> otherwise I propose to use my patch that fixes current code
> >>> (http://patchwork.kernel.org/patch/21829/).
> >>>
> >>
> >> I would like Avi to decide. As comments to the difference of 2
> >> patches, my undrestanding is that I am addressing the problem base
> >> on SDM 5-4 with the answer to serial injection as first in first
> >> service. Your patch doesn;t solve generic double fault case for
> >> example exception 11 on 11, or GP on GP which needs to be converted
> >> to #DF per SDM, rather you only handle the case the secondary
> >> exception is PF, and servicing PF.
> >>
> > There is nothing to decide really. I prefer your patch with serial
> > exception handling fixed. If you'll not do it I'll do it.
>
> OK, an additional patch will be constructive but my position is neutral. The reason (mentioned) is:
>
> 1: Current KVM just WARN_ON for those case (and never be hit), so the this patch won't introduce
> additional issues. Either printk or WARN_ON to notify us in case we met the problem in future is safer way for me.
>
But current KVM also replace pending exception with a newer one after
WARN_ON. I agree that real OSes (at least common ones) never hit this
case. But it is possible to hit it from a guest and I have a test case.
> 2: In case of real "serial ecception" happens, from architectural point of view, I think we'd better consult Table 5-2 to prioritize them, which is neither reserving former exception nor overwritting. But as you mentioned, the list is not completed. My point is that this is another complicated scenario that we should spend time in future, but not related to current patch.
>
If you can get more complete info about what real CPU does in case of
simultaneous exceptions it would be nice. I think CPU is smart enough
to understand when second exception happened while trying to handle the
first one and handle the second one first in this case. Otherwise I
don't see how it could work.
> 3: This function will soon needs to be extended to cover IRQ case too, which needs to push back the overwritten IRQ. We need a total solution for this, so I prefer to do that some time later.
>
I don't think that IRQ should be handled by this function. At leas it
should still be stored in its own queue.
> 4: I prefer to split issue if possible.
>
>
That is OK, You can send two patches. The first one will WARN_ON and
overwrite exception like the current code does. And the second one will
remove WARN_ON explaining that this case is actually possible to trigger
from a guest.
> >
> >> I can check with internal architecture to see what does "handle
> >> exceptions serially" mean in really. For me serial means first in
> >> first out, and thus we should remain 1st exception.
> >>
> > There is a table 5.2 that defines an order between some events. The
> > table is not complete, I don't see #DE there for instance. But
> > consider
> > this case: #DE (or #NP) happens while exception stack is paged out so
> > #PF happens next. #PF is handled by TSS gate so it uses its own stack
> > and it fixes exception stack in its handler. If we drop #PF because
> > #DE is already waiting we will keep trying to inject #DE
> > indefinitely. The result is hanging QEMU process eating 100% cpu
> > time. If we replace #DE with #PF on the other hand then #PF handler
> > will fix exception stack instruction that caused #DE will be
> > re-executed, #DE regenerated and handled properly. So which scenario
> > do you prefer?
>
> See above.
>
> Thx, eddie
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* event injection MACROs
2009-05-11 6:17 ` Avi Kivity
@ 2009-05-12 7:38 ` Dong, Eddie
2009-05-12 8:49 ` Gleb Natapov
2009-05-13 9:49 ` Avi Kivity
0 siblings, 2 replies; 33+ messages in thread
From: Dong, Eddie @ 2009-05-12 7:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm@vger.kernel.org, Dong, Eddie
I noticed the MACRO for SVM vmcb->control.event_inj and VMX VM_EXIT_INTR_INFO are almost same, I have a need to query the event injection situation in common code so plan to expose this register read/write to x86.c. Should we define a new format for evtinj/VM_EXIT_INTR_INFO as common KVM format, or just move those original MACRO to kvm_host.h?
Thx, eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: event injection MACROs
2009-05-12 7:38 ` event injection MACROs Dong, Eddie
@ 2009-05-12 8:49 ` Gleb Natapov
2009-05-13 9:49 ` Avi Kivity
1 sibling, 0 replies; 33+ messages in thread
From: Gleb Natapov @ 2009-05-12 8:49 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Avi Kivity, kvm@vger.kernel.org
On Tue, May 12, 2009 at 03:38:59PM +0800, Dong, Eddie wrote:
> I noticed the MACRO for SVM vmcb->control.event_inj and VMX VM_EXIT_INTR_INFO are almost same, I have a need to query the event injection situation in common code so plan to expose this register read/write to x86.c. Should we define a new format for evtinj/VM_EXIT_INTR_INFO as common KVM format, or just move those original MACRO to kvm_host.h?
>
I haven't seen your code, so I don't know what you are trying to do, but why
querying interrupt/nmi/exception injection queues is not enough? What
info is missing there?
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Enable IRQ windows after exception injection if there are pending virq
2009-05-12 7:01 ` Gleb Natapov
@ 2009-05-12 15:06 ` Dong, Eddie
2009-05-12 15:27 ` Gleb Natapov
2009-05-13 14:05 ` Implement generic double fault generation mechanism Dong, Eddie
1 sibling, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-12 15:06 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm@vger.kernel.org, Dong, Eddie, Dong, Eddie
[-- Attachment #1: Type: text/plain, Size: 1609 bytes --]
I didn't take many test since our PTS system stop working now due to KVM userspace
build changes. But since the logic is pretty simple, so I want to post here to see comments.
Thx, eddie
If there is pending irq after an virtual exception is injected,
KVM needs to enable IRQ window to trap back earlier once
the exception is handled.
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 308d8e9..f8ceaea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3154,15 +3154,18 @@ static void inject_irq(struct kvm_vcpu *vcpu)
}
}
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void inject_pending_irq(struct kvm_vcpu *vcpu)
{
- bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
- kvm_run->request_interrupt_window;
-
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
kvm_x86_ops->drop_interrupt_shadow(vcpu);
inject_irq(vcpu);
+}
+
+static void set_pending_virq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+ kvm_run->request_interrupt_window;
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
@@ -3229,7 +3232,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else
- inject_pending_irq(vcpu, kvm_run);
+ inject_pending_irq(vcpu);
+
+ set_pending_virq(vcpu, kvm_run);
if (kvm_lapic_enabled(vcpu)) {
if (!vcpu->arch.apic->vapic_addr)
[-- Attachment #2: irq_windows.patch --]
[-- Type: application/octet-stream, Size: 1368 bytes --]
If there is pending irq after an virtual exception is injected,
KVM needs to enable IRQ window to trap back earlier once
the exception is handled.
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 308d8e9..f8ceaea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3154,15 +3154,18 @@ static void inject_irq(struct kvm_vcpu *vcpu)
}
}
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void inject_pending_irq(struct kvm_vcpu *vcpu)
{
- bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
- kvm_run->request_interrupt_window;
-
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
kvm_x86_ops->drop_interrupt_shadow(vcpu);
inject_irq(vcpu);
+}
+
+static void set_pending_virq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+ kvm_run->request_interrupt_window;
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
@@ -3229,7 +3232,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else
- inject_pending_irq(vcpu, kvm_run);
+ inject_pending_irq(vcpu);
+
+ set_pending_virq(vcpu, kvm_run);
if (kvm_lapic_enabled(vcpu)) {
if (!vcpu->arch.apic->vapic_addr)
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: Enable IRQ windows after exception injection if there are pending virq
2009-05-12 15:06 ` Enable IRQ windows after exception injection if there are pending virq Dong, Eddie
@ 2009-05-12 15:27 ` Gleb Natapov
2009-05-13 7:45 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-12 15:27 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Avi Kivity, kvm@vger.kernel.org
On Tue, May 12, 2009 at 11:06:39PM +0800, Dong, Eddie wrote:
>
> I didn't take many test since our PTS system stop working now due to KVM userspace
> build changes. But since the logic is pretty simple, so I want to post here to see comments.
> Thx, eddie
>
>
>
>
> If there is pending irq after an virtual exception is injected,
> KVM needs to enable IRQ window to trap back earlier once
> the exception is handled.
>
I already posted patch to do that http://patchwork.kernel.org/patch/21830/
Is you patch different?
> Signed-off-by: Eddie Dong <eddie.dong@intel.com>
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 308d8e9..f8ceaea 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3154,15 +3154,18 @@ static void inject_irq(struct kvm_vcpu *vcpu)
> }
> }
>
> -static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void inject_pending_irq(struct kvm_vcpu *vcpu)
> {
> - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> - kvm_run->request_interrupt_window;
> -
> if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> kvm_x86_ops->drop_interrupt_shadow(vcpu);
>
> inject_irq(vcpu);
> +}
> +
> +static void set_pending_virq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +{
> + bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> + kvm_run->request_interrupt_window;
>
> /* enable NMI/IRQ window open exits if needed */
> if (vcpu->arch.nmi_pending)
> @@ -3229,7 +3232,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> if (vcpu->arch.exception.pending)
> __queue_exception(vcpu);
> else
> - inject_pending_irq(vcpu, kvm_run);
> + inject_pending_irq(vcpu);
> +
> + set_pending_virq(vcpu, kvm_run);
>
> if (kvm_lapic_enabled(vcpu)) {
> if (!vcpu->arch.apic->vapic_addr)
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Enable IRQ windows after exception injection if there are pending virq
2009-05-12 15:27 ` Gleb Natapov
@ 2009-05-13 7:45 ` Dong, Eddie
2009-05-13 10:29 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-13 7:45 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Avi Kivity, kvm@vger.kernel.org, Dong, Eddie, Dong, Eddie
Gleb Natapov wrote:
> On Tue, May 12, 2009 at 11:06:39PM +0800, Dong, Eddie wrote:
>>
>> I didn't take many test since our PTS system stop working now due to
>> KVM userspace
>> build changes. But since the logic is pretty simple, so I want to
>> post here to see comments. Thx, eddie
>>
>>
>>
>>
>> If there is pending irq after an virtual exception is injected,
>> KVM needs to enable IRQ window to trap back earlier once
>> the exception is handled.
>>
> I already posted patch to do that
> http://patchwork.kernel.org/patch/21830/ Is you patch different?
>
Is it base on the idea I mentioned to you in private mail (April 27), or a novel one?
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: event injection MACROs
2009-05-12 7:38 ` event injection MACROs Dong, Eddie
2009-05-12 8:49 ` Gleb Natapov
@ 2009-05-13 9:49 ` Avi Kivity
2009-05-13 14:20 ` Dong, Eddie
1 sibling, 1 reply; 33+ messages in thread
From: Avi Kivity @ 2009-05-13 9:49 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org
Dong, Eddie wrote:
> I noticed the MACRO for SVM vmcb->control.event_inj and VMX VM_EXIT_INTR_INFO are almost same, I have a need to query the event injection situation in common code so plan to expose this register read/write to x86.c. Should we define a new format for evtinj/VM_EXIT_INTR_INFO as common KVM format, or just move those original MACRO to kvm_host.h?
>
>
This is dangerous if additional bits or field values are defined by
either architecture. Better to use accessors.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Enable IRQ windows after exception injection if there are pending virq
2009-05-13 7:45 ` Dong, Eddie
@ 2009-05-13 10:29 ` Gleb Natapov
0 siblings, 0 replies; 33+ messages in thread
From: Gleb Natapov @ 2009-05-13 10:29 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Avi Kivity, kvm@vger.kernel.org
On Wed, May 13, 2009 at 03:45:37PM +0800, Dong, Eddie wrote:
> Gleb Natapov wrote:
> > On Tue, May 12, 2009 at 11:06:39PM +0800, Dong, Eddie wrote:
> >>
> >> I didn't take many test since our PTS system stop working now due to
> >> KVM userspace
> >> build changes. But since the logic is pretty simple, so I want to
> >> post here to see comments. Thx, eddie
> >>
> >>
> >>
> >>
> >> If there is pending irq after an virtual exception is injected,
> >> KVM needs to enable IRQ window to trap back earlier once
> >> the exception is handled.
> >>
> > I already posted patch to do that
> > http://patchwork.kernel.org/patch/21830/ Is you patch different?
> >
> Is it base on the idea I mentioned to you in private mail (April 27), or a novel one?
>
Yes. It fixes the bug you pointed out.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: Implement generic double fault generation mechanism
2009-05-12 7:01 ` Gleb Natapov
2009-05-12 15:06 ` Enable IRQ windows after exception injection if there are pending virq Dong, Eddie
@ 2009-05-13 14:05 ` Dong, Eddie
1 sibling, 0 replies; 33+ messages in thread
From: Dong, Eddie @ 2009-05-13 14:05 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm@vger.kernel.org, Avi Kivity, Dong, Eddie
[-- Attachment #1: Type: text/plain, Size: 1737 bytes --]
> That is OK, You can send two patches. The first one will WARN_ON and
> overwrite exception like the current code does. And the second one
> will remove WARN_ON explaining that this case is actually possible to
> trigger from a guest.
>
Sounds you don't like to provide this additional one, here it is for the purpose of
removing the block issue. My basic position is still same with what mentioned
in previous mail, but I am neutral to either way.
Thx, eddie
Signed-off-by: Eddie Dong <eddie.dong@intel.com>
Overwriting former event may help forward progress
in case of multiple exception/interrupt happens serially.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d0e75a2..b3de5d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -183,11 +183,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
int class1, class2;
if (!vcpu->arch.exception.pending) {
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = has_error;
- vcpu->arch.exception.nr = nr;
- vcpu->arch.exception.error_code = error_code;
- return;
+ goto out;
}
/* to check exception */
@@ -208,9 +204,15 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = true;
vcpu->arch.exception.nr = DF_VECTOR;
vcpu->arch.exception.error_code = 0;
+ return;
} else
printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
prev_nr, nr);
+out:
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = has_error;
+ vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.error_code = error_code;
}
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
[-- Attachment #2: serial_irq.patch --]
[-- Type: application/octet-stream, Size: 1304 bytes --]
commit 36b25ad14fadd5d2966f6935622b58b2bec89a2f
Author: root <root@eddie-wb.localdomain>
Date: Wed May 13 22:45:40 2009 +0800
Overwriting former event may help forward progress
in case of multiple exception/interrupt happens serially.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d0e75a2..b3de5d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -183,11 +183,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
int class1, class2;
if (!vcpu->arch.exception.pending) {
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.has_error_code = has_error;
- vcpu->arch.exception.nr = nr;
- vcpu->arch.exception.error_code = error_code;
- return;
+ goto out;
}
/* to check exception */
@@ -208,9 +204,15 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = true;
vcpu->arch.exception.nr = DF_VECTOR;
vcpu->arch.exception.error_code = 0;
+ return;
} else
printk(KERN_ERR "Exception 0x%x on 0x%x happens serially\n",
prev_nr, nr);
+out:
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.has_error_code = has_error;
+ vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.error_code = error_code;
}
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
^ permalink raw reply related [flat|nested] 33+ messages in thread
* RE: event injection MACROs
2009-05-13 9:49 ` Avi Kivity
@ 2009-05-13 14:20 ` Dong, Eddie
2009-05-14 9:27 ` Avi Kivity
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-13 14:20 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm@vger.kernel.org, Dong, Eddie
Avi Kivity wrote:
> Dong, Eddie wrote:
>> I noticed the MACRO for SVM vmcb->control.event_inj and VMX
>> VM_EXIT_INTR_INFO are almost same, I have a need to query the event
>> injection situation in common code so plan to expose this register
>> read/write to x86.c. Should we define a new format for
>> evtinj/VM_EXIT_INTR_INFO as common KVM format, or just move those
>> original MACRO to kvm_host.h?
>>
>>
>
> This is dangerous if additional bits or field values are defined by
> either architecture. Better to use accessors.
OK.
Also back to Gleb's question, the reason I want to do that is to simplify event
generation mechanism in current KVM.
Today KVM use additional layer of exception/nmi/interrupt such as
vcpu.arch.exception.pending, vcpu->arch.interrupt.pending & vcpu->arch.nmi_injected.
All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD
write to inject the event. Both SVM & VMX has only one resource to inject the virtual event
but KVM generates 3 catagory of events in parallel which further requires additional
logic to dictate among them. One example is that exception has higher priority
than NMI/IRQ injection in current code which is not true in reality.
Another issue is that an failed event from previous injection say IRQ or NMI may be
discarded if an virtual exception happens in the EXIT handling now. With the patch of
generic double fault handling, this case should be handled as normally.
Will post RFC soon.
Thx, eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: event injection MACROs
2009-05-13 14:20 ` Dong, Eddie
@ 2009-05-14 9:27 ` Avi Kivity
2009-05-14 13:43 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Avi Kivity @ 2009-05-14 9:27 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm@vger.kernel.org
Dong, Eddie wrote:
> OK.
> Also back to Gleb's question, the reason I want to do that is to simplify event
> generation mechanism in current KVM.
>
> Today KVM use additional layer of exception/nmi/interrupt such as
> vcpu.arch.exception.pending, vcpu->arch.interrupt.pending & vcpu->arch.nmi_injected.
> All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD
> write to inject the event. Both SVM & VMX has only one resource to inject the virtual event
> but KVM generates 3 catagory of events in parallel which further requires additional
> logic to dictate among them.
I thought of using a queue to hold all pending events (in a common
format), sort it by priority, and inject the head.
> One example is that exception has higher priority
> than NMI/IRQ injection in current code which is not true in reality.
>
I don't think it matters in practice, since the guest will see it as a
timing issue. NMIs and IRQs are asynchronous (even those generated by
the guest through the local APIC).
> Another issue is that an failed event from previous injection say IRQ or NMI may be
> discarded if an virtual exception happens in the EXIT handling now. With the patch of
> generic double fault handling, this case should be handled as normally.
>
Discarding an exception is usually okay as it will be regenerated. I
don't think we discard interrupts or NMIs.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: event injection MACROs
2009-05-14 9:27 ` Avi Kivity
@ 2009-05-14 13:43 ` Dong, Eddie
2009-05-14 14:16 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-14 13:43 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm@vger.kernel.org, Dong, Eddie
Avi Kivity wrote:
> Dong, Eddie wrote:
>> OK.
>> Also back to Gleb's question, the reason I want to do that is to
>> simplify event
>> generation mechanism in current KVM.
>>
>> Today KVM use additional layer of exception/nmi/interrupt such as
>> vcpu.arch.exception.pending, vcpu->arch.interrupt.pending &
>> vcpu->arch.nmi_injected.
>> All those additional layer is due to compete of
>> VM_ENTRY_INTR_INFO_FIELD
>> write to inject the event. Both SVM & VMX has only one resource to
>> inject the virtual event but KVM generates 3 catagory of events in
>> parallel which further requires additional
>> logic to dictate among them.
>
> I thought of using a queue to hold all pending events (in a common
> format), sort it by priority, and inject the head.
The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/
Triple fault or inject serially when 2 events happens no matter NMI, IRQ or exception.
As if considering above events merging activity, that is a single element queue.
We could have either: 1) A pure SW "queue" that will be flush to HW
register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
A potential benefit is that it can avoid duplicated code and potential bugs
in current code as following patch shows if I understand correctly:
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
cr2 = vmcs_readl(EXIT_QUALIFICATION);
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
(u32)((u64)cr2 >> 32), handler);
- if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending
)
+ if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending
|| vcpu->arch.nmi_injected)
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}
If using above merged SW "queue" or HW direct register, we can do like following:
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
cr2 = vmcs_readl(EXIT_QUALIFICATION);
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
(u32)((u64)cr2 >> 32), handler);
- if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending
)
+ if (vmcs_read(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}
Either way are OK and up to you. BTW Xen uses HW register directly to representing
an pending event.
>
>> One example is that exception has higher priority
>> than NMI/IRQ injection in current code which is not true in reality.
>>
>
> I don't think it matters in practice, since the guest will see it as a
> timing issue. NMIs and IRQs are asynchronous (even those generated by
> the guest through the local APIC).
Yes. But also cause IRQ injection be delayed which may have side effect.
For example if guest exception handler is very longer or if guest VCPU fall into
recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running
on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it
is recursively #GP.
>
>> Another issue is that an failed event from previous injection say
>> IRQ or NMI may be discarded if an virtual exception happens in the
>> EXIT handling now. With the patch of generic double fault handling,
>> this case should be handled as normally.
>>
>
> Discarding an exception is usually okay as it will be regenerated. I
> don't think we discard interrupts or NMIs.
In reality (Running OS in guest), it doesn't happen so far. But architecturally,
it could. For example KVM injects an IRQ, but VM Resume get #PF and
back to KVM with IDT_VECTORING valid. Then KVM will put back the failed
IRQ to interrupt queue. But if #PF handling generates another exception,
then the interrupt queue won't be able to be injected, since KVM inject
exception first. And the interrupt queue is discarded at next VM Exit.
Overal, I think this is mostly for simplification but may benefit future
a lot. Especially with Gleb's recent cleanup, it soulds to be much easier to
do than before.
I may make mistake here, will like to see more comments.
thx, eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: event injection MACROs
2009-05-14 13:43 ` Dong, Eddie
@ 2009-05-14 14:16 ` Gleb Natapov
2009-05-14 14:34 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-14 14:16 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Avi Kivity, kvm@vger.kernel.org
On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
> Avi Kivity wrote:
> > Dong, Eddie wrote:
> >> OK.
> >> Also back to Gleb's question, the reason I want to do that is to
> >> simplify event
> >> generation mechanism in current KVM.
> >>
> >> Today KVM use additional layer of exception/nmi/interrupt such as
> >> vcpu.arch.exception.pending, vcpu->arch.interrupt.pending &
> >> vcpu->arch.nmi_injected.
> >> All those additional layer is due to compete of
> >> VM_ENTRY_INTR_INFO_FIELD
> >> write to inject the event. Both SVM & VMX has only one resource to
> >> inject the virtual event but KVM generates 3 catagory of events in
> >> parallel which further requires additional
> >> logic to dictate among them.
> >
> > I thought of using a queue to hold all pending events (in a common
> > format), sort it by priority, and inject the head.
>
> The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/
> Triple fault or inject serially when 2 events happens no matter NMI, IRQ or exception.
>
> As if considering above events merging activity, that is a single element queue.
I don't know how you got to this conclusion from you previous statement.
See explanation to table 5-2 for instate where it is stated that
interrupt should be held pending if there is exception with higher
priority. Should be held pending where? In the queue, like we do. Note
that low prio exceptions are just dropped since they will be regenerated.
> We could have either: 1) A pure SW "queue" that will be flush to HW
> register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
>
We have three event sources 1) exceptions 2) IRQ 3) NMI. We should have
queue of three elements sorted by priority. On each entry we should
inject an event with highest priority. And remove it from queue on exit.
>
> A potential benefit is that it can avoid duplicated code and potential bugs
> in current code as following patch shows if I understand correctly:
>
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct
> kvm_run *kvm_run)
> cr2 = vmcs_readl(EXIT_QUALIFICATION);
> KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
> (u32)((u64)cr2 >> 32), handler);
> - if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending
> )
> + if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending
> || vcpu->arch.nmi_injected)
> kvm_mmu_unprotect_page_virt(vcpu, cr2);
> return kvm_mmu_page_fault(vcpu, cr2, error_code);
> }
This fix is already in Avi's tree (not yet pushed).
> Either way are OK and up to you. BTW Xen uses HW register directly to representing
> an pending event.
>
In this particular case I don't mind to use HW register either, but I
don't see any advantage.
> >
> >> One example is that exception has higher priority
> >> than NMI/IRQ injection in current code which is not true in reality.
> >>
> >
> > I don't think it matters in practice, since the guest will see it as a
> > timing issue. NMIs and IRQs are asynchronous (even those generated by
> > the guest through the local APIC).
>
> Yes. But also cause IRQ injection be delayed which may have side effect.
> For example if guest exception handler is very longer or if guest VCPU fall into
> recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running
> on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it
> is recursively #GP.
If one #GP causes another #GP this is a #DF. If CPU has a chance to executes
something in between KVM will have a chance to inject NMI.
>
> >
> >> Another issue is that an failed event from previous injection say
> >> IRQ or NMI may be discarded if an virtual exception happens in the
> >> EXIT handling now. With the patch of generic double fault handling,
> >> this case should be handled as normally.
> >>
> >
> > Discarding an exception is usually okay as it will be regenerated. I
> > don't think we discard interrupts or NMIs.
> In reality (Running OS in guest), it doesn't happen so far. But architecturally,
> it could. For example KVM injects an IRQ, but VM Resume get #PF and
> back to KVM with IDT_VECTORING valid. Then KVM will put back the failed
> IRQ to interrupt queue. But if #PF handling generates another exception,
> then the interrupt queue won't be able to be injected, since KVM inject
> exception first. And the interrupt queue is discarded at next VM Exit.
>
I acknowledge the presence of the bug although I was not able to write a test case
to cause it yet, but it is easy to fix this without changing code too much. Unified event
queue and clearing of only injected event on exit should do the trick.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: event injection MACROs
2009-05-14 14:16 ` Gleb Natapov
@ 2009-05-14 14:34 ` Dong, Eddie
2009-05-14 15:44 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-14 14:34 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Avi Kivity, kvm@vger.kernel.org, Dong, Eddie
Gleb Natapov wrote:
> On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
>> Avi Kivity wrote:
>>> Dong, Eddie wrote:
>>>> OK.
>>>> Also back to Gleb's question, the reason I want to do that is to
>>>> simplify event generation mechanism in current KVM.
>>>>
>>>> Today KVM use additional layer of exception/nmi/interrupt such as
>>>> vcpu.arch.exception.pending, vcpu->arch.interrupt.pending &
>>>> vcpu->arch.nmi_injected. All those additional layer is due to
>>>> compete of VM_ENTRY_INTR_INFO_FIELD
>>>> write to inject the event. Both SVM & VMX has only one resource to
>>>> inject the virtual event but KVM generates 3 catagory of events in
>>>> parallel which further requires additional
>>>> logic to dictate among them.
>>>
>>> I thought of using a queue to hold all pending events (in a common
>>> format), sort it by priority, and inject the head.
>>
>> The SDM Table 5-4 requires to merge 2 events together, i.e. convert
>> to #DF/
>> Triple fault or inject serially when 2 events happens no matter NMI,
>> IRQ or exception.
>>
>> As if considering above events merging activity, that is a single
>> element queue.
> I don't know how you got to this conclusion from you previous
> statement.
> See explanation to table 5-2 for instate where it is stated that
> interrupt should be held pending if there is exception with higher
> priority. Should be held pending where? In the queue, like we do. Note
> that low prio exceptions are just dropped since they will be
> regenerated.
I have different understanding here.
My understanding is that "held" means NO INTA in HW, i.e. LAPIC still hold this IRQ.
>
>> We could have either: 1) A pure SW "queue" that will be flush to HW
>> register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
>>
> We have three event sources 1) exceptions 2) IRQ 3) NMI. We should
> have
> queue of three elements sorted by priority. On each entry we should
Table 5-4 alreadys says NMI/IRQ is BENIGN.
> inject an event with highest priority. And remove it from queue on
> exit.
The problem is that we have to decide to inject only one of above 3, and discard the rest.
Whether priority them or merge (to one event as Table 5-4) is another story.
>
>>
>> A potential benefit is that it can avoid duplicated code and
>> potential bugs
>> in current code as following patch shows if I understand correctly:
>>
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu
>> *vcpu, struct kvm_run *kvm_run) cr2 =
>> vmcs_readl(EXIT_QUALIFICATION);
>> KVMTRACE_3D(PAGE_FAULT, vcpu,
>> error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); -
>> if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending ) +
>> if (vcpu->arch.interrupt.pending ||
>> vcpu->arch.exception.pending ||
>> vcpu->arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu,
>> cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); }
> This fix is already in Avi's tree (not yet pushed).
>
>> Either way are OK and up to you. BTW Xen uses HW register directly
>> to representing
>> an pending event.
>>
> In this particular case I don't mind to use HW register either, but I
> don't see any advantage.
>
>>>
>>>> One example is that exception has higher priority
>>>> than NMI/IRQ injection in current code which is not true in
>>>> reality.
>>>>
>>>
>>> I don't think it matters in practice, since the guest will see it
>>> as a timing issue. NMIs and IRQs are asynchronous (even those
>>> generated by the guest through the local APIC).
>>
>> Yes. But also cause IRQ injection be delayed which may have side
>> effect.
>> For example if guest exception handler is very longer or if guest
>> VCPU fall into recursive #GP. Within current logic, a guest IRQ
>> event from KDB (IPI) running
>> on VCPU0, as an example, can't force the dead loop VCPU1 into KDB
>> since it
>> is recursively #GP.
> If one #GP causes another #GP this is a #DF. If CPU has a chance to
Means another #GP in next instruction i.e. Beginning of #GP handler in guest.
No #DF here.
> executes
> something in between KVM will have a chance to inject NMI.
Could have no chance in some cases though not very common.
>
>>
>>>
>>>> Another issue is that an failed event from previous injection say
>>>> IRQ or NMI may be discarded if an virtual exception happens in the
>>>> EXIT handling now. With the patch of generic double fault handling,
>>>> this case should be handled as normally.
>>>>
>>>
>>> Discarding an exception is usually okay as it will be regenerated.
>>> I don't think we discard interrupts or NMIs.
>> In reality (Running OS in guest), it doesn't happen so far. But
>> architecturally,
>> it could. For example KVM injects an IRQ, but VM Resume get #PF and
>> back to KVM with IDT_VECTORING valid. Then KVM will put back the
>> failed
>> IRQ to interrupt queue. But if #PF handling generates another
>> exception,
>> then the interrupt queue won't be able to be injected, since KVM
>> inject
>> exception first. And the interrupt queue is discarded at next VM
>> Exit.
>>
> I acknowledge the presence of the bug although I was not able to
> write a test case
> to cause it yet, but it is easy to fix this without changing code too
> much. Unified event queue and clearing of only injected event on exit
> should do the trick.
Yes, minor.
Eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: event injection MACROs
2009-05-14 14:34 ` Dong, Eddie
@ 2009-05-14 15:44 ` Gleb Natapov
2009-05-15 7:57 ` Dong, Eddie
0 siblings, 1 reply; 33+ messages in thread
From: Gleb Natapov @ 2009-05-14 15:44 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Avi Kivity, kvm@vger.kernel.org
On Thu, May 14, 2009 at 10:34:11PM +0800, Dong, Eddie wrote:
> Gleb Natapov wrote:
> > On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
> >> Avi Kivity wrote:
> >>> Dong, Eddie wrote:
> >>>> OK.
> >>>> Also back to Gleb's question, the reason I want to do that is to
> >>>> simplify event generation mechanism in current KVM.
> >>>>
> >>>> Today KVM use additional layer of exception/nmi/interrupt such as
> >>>> vcpu.arch.exception.pending, vcpu->arch.interrupt.pending &
> >>>> vcpu->arch.nmi_injected. All those additional layer is due to
> >>>> compete of VM_ENTRY_INTR_INFO_FIELD
> >>>> write to inject the event. Both SVM & VMX has only one resource to
> >>>> inject the virtual event but KVM generates 3 catagory of events in
> >>>> parallel which further requires additional
> >>>> logic to dictate among them.
> >>>
> >>> I thought of using a queue to hold all pending events (in a common
> >>> format), sort it by priority, and inject the head.
> >>
> >> The SDM Table 5-4 requires to merge 2 events together, i.e. convert
> >> to #DF/
> >> Triple fault or inject serially when 2 events happens no matter NMI,
> >> IRQ or exception.
> >>
> >> As if considering above events merging activity, that is a single
> >> element queue.
> > I don't know how you got to this conclusion from you previous
> > statement.
> > See explanation to table 5-2 for instate where it is stated that
> > interrupt should be held pending if there is exception with higher
> > priority. Should be held pending where? In the queue, like we do. Note
> > that low prio exceptions are just dropped since they will be
> > regenerated.
>
> I have different understanding here.
> My understanding is that "held" means NO INTA in HW, i.e. LAPIC still hold this IRQ.
>
And what if INTA already happened and CPU is ready to fetch IDT for
interrupt vector and at this very moment CPU faults?
> >
> >> We could have either: 1) A pure SW "queue" that will be flush to HW
> >> register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
> >>
> > We have three event sources 1) exceptions 2) IRQ 3) NMI. We should
> > have
> > queue of three elements sorted by priority. On each entry we should
>
> Table 5-4 alreadys says NMI/IRQ is BENIGN.
Table 5-2 applies here not table 5-4 I think.
>
> > inject an event with highest priority. And remove it from queue on
> > exit.
>
> The problem is that we have to decide to inject only one of above 3, and discard the rest.
> Whether priority them or merge (to one event as Table 5-4) is another story.
Only a small number of event are merged into #DF. Most handled serially
(SDM does not define what serially means unfortunately), so I don't
understand where "discard the rest" is come from. We can discard
exception since it will be regenerated anyway, but IRQ and NMI is
another story. SDM says that IRQ should be held pending (once again not
much explanation here), nothing about NMI.
> >
> >>
> >> A potential benefit is that it can avoid duplicated code and
> >> potential bugs
> >> in current code as following patch shows if I understand correctly:
> >>
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu
> >> *vcpu, struct kvm_run *kvm_run) cr2 =
> >> vmcs_readl(EXIT_QUALIFICATION);
> >> KVMTRACE_3D(PAGE_FAULT, vcpu,
> >> error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); -
> >> if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending ) +
> >> if (vcpu->arch.interrupt.pending ||
> >> vcpu->arch.exception.pending ||
> >> vcpu->arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu,
> >> cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); }
> > This fix is already in Avi's tree (not yet pushed).
> >
> >> Either way are OK and up to you. BTW Xen uses HW register directly
> >> to representing
> >> an pending event.
> >>
> > In this particular case I don't mind to use HW register either, but I
> > don't see any advantage.
> >
> >>>
> >>>> One example is that exception has higher priority
> >>>> than NMI/IRQ injection in current code which is not true in
> >>>> reality.
> >>>>
> >>>
> >>> I don't think it matters in practice, since the guest will see it
> >>> as a timing issue. NMIs and IRQs are asynchronous (even those
> >>> generated by the guest through the local APIC).
> >>
> >> Yes. But also cause IRQ injection be delayed which may have side
> >> effect.
> >> For example if guest exception handler is very longer or if guest
> >> VCPU fall into recursive #GP. Within current logic, a guest IRQ
> >> event from KDB (IPI) running
> >> on VCPU0, as an example, can't force the dead loop VCPU1 into KDB
> >> since it
> >> is recursively #GP.
> > If one #GP causes another #GP this is a #DF. If CPU has a chance to
>
> Means another #GP in next instruction i.e. Beginning of #GP handler in guest.
> No #DF here.
>
In this case we will enter guest with "NMI windows open" request and
should exit immediately before first instruction of #GP handler. At this
moment KVM will be able to inject NMI.
> > executes
> > something in between KVM will have a chance to inject NMI.
>
> Could have no chance in some cases though not very common.
>
> >
> >>
> >>>
> >>>> Another issue is that an failed event from previous injection say
> >>>> IRQ or NMI may be discarded if an virtual exception happens in the
> >>>> EXIT handling now. With the patch of generic double fault handling,
> >>>> this case should be handled as normally.
> >>>>
> >>>
> >>> Discarding an exception is usually okay as it will be regenerated.
> >>> I don't think we discard interrupts or NMIs.
> >> In reality (Running OS in guest), it doesn't happen so far. But
> >> architecturally,
> >> it could. For example KVM injects an IRQ, but VM Resume get #PF and
> >> back to KVM with IDT_VECTORING valid. Then KVM will put back the
> >> failed
> >> IRQ to interrupt queue. But if #PF handling generates another
> >> exception,
> >> then the interrupt queue won't be able to be injected, since KVM
> >> inject
> >> exception first. And the interrupt queue is discarded at next VM
> >> Exit.
> >>
> > I acknowledge the presence of the bug although I was not able to
> > write a test case
> > to cause it yet, but it is easy to fix this without changing code too
> > much. Unified event queue and clearing of only injected event on exit
> > should do the trick.
>
> Yes, minor.
>
> Eddie
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: event injection MACROs
2009-05-14 15:44 ` Gleb Natapov
@ 2009-05-15 7:57 ` Dong, Eddie
2009-05-17 9:44 ` Gleb Natapov
0 siblings, 1 reply; 33+ messages in thread
From: Dong, Eddie @ 2009-05-15 7:57 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Avi Kivity, kvm@vger.kernel.org, Dong, Eddie
Gleb Natapov wrote:
> On Thu, May 14, 2009 at 10:34:11PM +0800, Dong, Eddie wrote:
>> Gleb Natapov wrote:
>>> On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
>>>> Avi Kivity wrote:
>>>>> Dong, Eddie wrote:
>>>>>> OK.
>>>>>> Also back to Gleb's question, the reason I want to do that is to
>>>>>> simplify event generation mechanism in current KVM.
>>>>>>
>>>>>> Today KVM use additional layer of exception/nmi/interrupt such as
>>>>>> vcpu.arch.exception.pending, vcpu->arch.interrupt.pending &
>>>>>> vcpu->arch.nmi_injected. All those additional layer is due to
>>>>>> compete of VM_ENTRY_INTR_INFO_FIELD
>>>>>> write to inject the event. Both SVM & VMX has only one resource
>>>>>> to inject the virtual event but KVM generates 3 catagory of
>>>>>> events in parallel which further requires additional
>>>>>> logic to dictate among them.
>>>>>
>>>>> I thought of using a queue to hold all pending events (in a common
>>>>> format), sort it by priority, and inject the head.
>>>>
>>>> The SDM Table 5-4 requires to merge 2 events together, i.e.
>>>> convert to #DF/ Triple fault or inject serially when 2 events
>>>> happens no matter NMI, IRQ or exception.
>>>>
>>>> As if considering above events merging activity, that is a single
>>>> element queue.
>>> I don't know how you got to this conclusion from you previous
>>> statement. See explanation to table 5-2 for instate where it is
>>> stated that interrupt should be held pending if there is exception
>>> with higher priority. Should be held pending where? In the queue,
>>> like we do. Note that low prio exceptions are just dropped since
>>> they will be regenerated.
>>
>> I have different understanding here.
>> My understanding is that "held" means NO INTA in HW, i.e. LAPIC
>> still hold this IRQ.
>>
> And what if INTA already happened and CPU is ready to fetch IDT for
> interrupt vector and at this very moment CPU faults?
If INTA happens, that means it is delivered. If its delivery triggers another
exception, that is what Table5-4 handles.
My understanding is that it is 2 stage process. Table 5-2 talk about
events happening before delivery, so that HW needs to prioritize them.
Once a decision is make, the highest one is delivered but then it could
trigger another exception when fetching IDT etc.
Current execption.pending/interrupt.pending/nmi_injected doesn't match
either of above, interrupt/nmi is only for failed event injection, and a strange
fixed priority check when it is really injected:
exception > failed NMI > failed IRQ > new NMI > new IRQ.
Table 5-2 looks missed in current KVM IMO except a wrong (but minor)
exception > NMI > IRQ sequence.
>
>>>
>>>> We could have either: 1) A pure SW "queue" that will be flush to
>>>> HW register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW
>>>> register.
>>>>
>>> We have three event sources 1) exceptions 2) IRQ 3) NMI. We should
>>> have queue of three elements sorted by priority. On each entry we
>>> should
>>
>> Table 5-4 alreadys says NMI/IRQ is BENIGN.
> Table 5-2 applies here not table 5-4 I think.
>
>>
>>> inject an event with highest priority. And remove it from queue on
>>> exit.
>>
>> The problem is that we have to decide to inject only one of above 3,
>> and discard the rest. Whether priority them or merge (to one event
>> as Table 5-4) is another story.
> Only a small number of event are merged into #DF. Most handled
> serially (SDM does not define what serially means unfortunately), so
> I don't understand where "discard the rest" is come from. We can
vmx_complete_interrupts clear all of them at next EXIT.
Even from HW point of view, if there are pending NMI/IRQ/exception,
CPU pick highest one, NMI, ignore/discard IRQ (but LAPIC still holds
IRQ, thus it can be re-injected), completely discard exception.
I don't say discarding has any problem, but unnecessary to keep all of 3.
the only difference is when to discard the rest 2, at queue_exception/irq/nmi
time or later on (even at next EXIT time), which is same to me.
> discard exception since it will be regenerated anyway, but IRQ and
> NMI is another story. SDM says that IRQ should be held pending (once
> again not much explanation here), nothing about NMI.
>
>>>
>>>>
>>>> A potential benefit is that it can avoid duplicated code and
>>>> potential bugs in current code as following patch shows if I
>>>> understand correctly:
>>>>
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu
>>>> *vcpu, struct kvm_run *kvm_run) cr2 =
>>>> vmcs_readl(EXIT_QUALIFICATION);
>>>> KVMTRACE_3D(PAGE_FAULT, vcpu,
>>>> error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); -
>>>> if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending )
>>>> + if (vcpu->arch.interrupt.pending ||
>>>> vcpu->arch.exception.pending ||
>>>> vcpu->arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu,
>>>> cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); }
>>> This fix is already in Avi's tree (not yet pushed).
>>>
>>>> Either way are OK and up to you. BTW Xen uses HW register directly
>>>> to representing an pending event.
>>>>
>>> In this particular case I don't mind to use HW register either, but
>>> I don't see any advantage.
>>>
>>>>>
>>>>>> One example is that exception has higher priority
>>>>>> than NMI/IRQ injection in current code which is not true in
>>>>>> reality.
>>>>>>
>>>>>
>>>>> I don't think it matters in practice, since the guest will see it
>>>>> as a timing issue. NMIs and IRQs are asynchronous (even those
>>>>> generated by the guest through the local APIC).
>>>>
>>>> Yes. But also cause IRQ injection be delayed which may have side
>>>> effect. For example if guest exception handler is very longer or
>>>> if guest VCPU fall into recursive #GP. Within current logic, a
>>>> guest IRQ event from KDB (IPI) running on VCPU0, as an example,
>>>> can't force the dead loop VCPU1 into KDB since it is recursively
>>>> #GP.
>>> If one #GP causes another #GP this is a #DF. If CPU has a chance to
>>
>> Means another #GP in next instruction i.e. Beginning of #GP handler
>> in guest.
>> No #DF here.
>>
> In this case we will enter guest with "NMI windows open" request and
> should exit immediately before first instruction of #GP handler. At
> this moment KVM will be able to inject NMI.
If the HW NMI windows is supported, it is fine, how about SW NMI case?
The flow will then look like:
Guest #GP instruction -> VM Exit -> Inject virtual #GP -> VMRESUME ->
try to execute 1st ins of guest #GP handler -> VM Exit again (#GP) ->
inject virtual #GP -> .....
>
>>> executes
>>> something in between KVM will have a chance to inject NMI.
>>
>> Could have no chance in some cases though not very common.
>>
>>>
>>>>
>>>>>
>>>>>> Another issue is that an failed event from previous injection say
>>>>>> IRQ or NMI may be discarded if an virtual exception happens in
>>>>>> the EXIT handling now. With the patch of generic double fault
>>>>>> handling, this case should be handled as normally.
>>>>>>
>>>>>
>>>>> Discarding an exception is usually okay as it will be regenerated.
>>>>> I don't think we discard interrupts or NMIs.
>>>> In reality (Running OS in guest), it doesn't happen so far. But
>>>> architecturally, it could. For example KVM injects an IRQ, but VM
>>>> Resume get #PF and back to KVM with IDT_VECTORING valid. Then KVM
>>>> will put back the failed IRQ to interrupt queue. But if #PF
>>>> handling generates another exception, then the interrupt queue
>>>> won't be able to be injected, since KVM inject exception first.
>>>> And the interrupt queue is discarded at next VM Exit.
>>>>
>>> I acknowledge the presence of the bug although I was not able to
>>> write a test case to cause it yet, but it is easy to fix this
>>> without changing code too much. Unified event queue and clearing of
>>> only injected event on exit should do the trick.
>>
>> Yes, minor.
>>
>> Eddie
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: event injection MACROs
2009-05-15 7:57 ` Dong, Eddie
@ 2009-05-17 9:44 ` Gleb Natapov
0 siblings, 0 replies; 33+ messages in thread
From: Gleb Natapov @ 2009-05-17 9:44 UTC (permalink / raw)
To: Dong, Eddie; +Cc: Avi Kivity, kvm@vger.kernel.org
On Fri, May 15, 2009 at 03:57:44PM +0800, Dong, Eddie wrote:
> > And what if INTA already happened and CPU is ready to fetch IDT for
> > interrupt vector and at this very moment CPU faults?
>
> If INTA happens, that means it is delivered. If its delivery triggers another
> exception, that is what Table5-4 handles.
>
Is this documented behaviour? Why not execute instruction in parallel
with INTA and IDT access?
> My understanding is that it is 2 stage process. Table 5-2 talk about
> events happening before delivery, so that HW needs to prioritize them.
> Once a decision is make, the highest one is delivered but then it could
> trigger another exception when fetching IDT etc.
>
> Current execption.pending/interrupt.pending/nmi_injected doesn't match
> either of above, interrupt/nmi is only for failed event injection, and a strange
> fixed priority check when it is really injected:
> exception > failed NMI > failed IRQ > new NMI > new IRQ.
The current code assumes that only one of failed NMI, failed IRQ or
exception can be true, So there is no prioritization at all between them.
Just checking which one is available. If there is no event that should
be re-injected then exception indeed is checked first, but, as Avi said,
it doesn't matter. It depends on timing and can't affect any guest code
and that is what really important. This is not the only place where KVM
timing is not the same as CPU timing BTW.
Pending exception field has two meanings though. It will be true either
for exception that failed injection or for newly generated exception,
but KVM is not CPU emulator and the second case happens very rarely
(only during emulation). Don't forget that most exceptions are handled
by CPU directly without KVM even knowing it. I wrote a test case that
generates #NP during NMI handling. I expected that after calling #NP
handler that fixes segment descriptor NMI handler will be called, but
this is not what happened on real HW. #NP was called, NMI was dropped,
but table 5-2 states that they should be handled serially. The good
thing it that the way KVM handles this situation now is the same as real
HW does, but if we will change code to do what SDM says KVM will behave
differently. I should do the same test with IRQ some day.
>
> Table 5-2 looks missed in current KVM IMO except a wrong (but minor)
> exception > NMI > IRQ sequence.
Doesn't matter. See above.
>
> >
> >>>
> >>>> We could have either: 1) A pure SW "queue" that will be flush to
> >>>> HW register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW
> >>>> register.
> >>>>
> >>> We have three event sources 1) exceptions 2) IRQ 3) NMI. We should
> >>> have queue of three elements sorted by priority. On each entry we
> >>> should
> >>
> >> Table 5-4 alreadys says NMI/IRQ is BENIGN.
> > Table 5-2 applies here not table 5-4 I think.
> >
> >>
> >>> inject an event with highest priority. And remove it from queue on
> >>> exit.
> >>
> >> The problem is that we have to decide to inject only one of above 3,
> >> and discard the rest. Whether priority them or merge (to one event
> >> as Table 5-4) is another story.
> > Only a small number of event are merged into #DF. Most handled
> > serially (SDM does not define what serially means unfortunately), so
> > I don't understand where "discard the rest" is come from. We can
>
> vmx_complete_interrupts clear all of them at next EXIT.
>
Because it is assumed that only one of them is true at the same time.
> Even from HW point of view, if there are pending NMI/IRQ/exception,
> CPU pick highest one, NMI, ignore/discard IRQ (but LAPIC still holds
> IRQ, thus it can be re-injected), completely discard exception.
>
For NMI/IRQ that what happens. If nmi_pending is true IRQ is held
pending in LAPIC (we don't even check IRQ is pending, who cares). For
exception ordering see above.
> I don't say discarding has any problem, but unnecessary to keep all of 3.
> the only difference is when to discard the rest 2, at queue_exception/irq/nmi
> time or later on (even at next EXIT time), which is same to me.
>
> > discard exception since it will be regenerated anyway, but IRQ and
> > NMI is another story. SDM says that IRQ should be held pending (once
> > again not much explanation here), nothing about NMI.
> >
> >>>
> >>>>
> >>>> A potential benefit is that it can avoid duplicated code and
> >>>> potential bugs in current code as following patch shows if I
> >>>> understand correctly:
> >>>>
> >>>> --- a/arch/x86/kvm/vmx.c
> >>>> +++ b/arch/x86/kvm/vmx.c
> >>>> @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu
> >>>> *vcpu, struct kvm_run *kvm_run) cr2 =
> >>>> vmcs_readl(EXIT_QUALIFICATION);
> >>>> KVMTRACE_3D(PAGE_FAULT, vcpu,
> >>>> error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); -
> >>>> if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending )
> >>>> + if (vcpu->arch.interrupt.pending ||
> >>>> vcpu->arch.exception.pending ||
> >>>> vcpu->arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu,
> >>>> cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); }
> >>> This fix is already in Avi's tree (not yet pushed).
> >>>
> >>>> Either way are OK and up to you. BTW Xen uses HW register directly
> >>>> to representing an pending event.
> >>>>
> >>> In this particular case I don't mind to use HW register either, but
> >>> I don't see any advantage.
> >>>
> >>>>>
> >>>>>> One example is that exception has higher priority
> >>>>>> than NMI/IRQ injection in current code which is not true in
> >>>>>> reality.
> >>>>>>
> >>>>>
> >>>>> I don't think it matters in practice, since the guest will see it
> >>>>> as a timing issue. NMIs and IRQs are asynchronous (even those
> >>>>> generated by the guest through the local APIC).
> >>>>
> >>>> Yes. But also cause IRQ injection be delayed which may have side
> >>>> effect. For example if guest exception handler is very longer or
> >>>> if guest VCPU fall into recursive #GP. Within current logic, a
> >>>> guest IRQ event from KDB (IPI) running on VCPU0, as an example,
> >>>> can't force the dead loop VCPU1 into KDB since it is recursively
> >>>> #GP.
> >>> If one #GP causes another #GP this is a #DF. If CPU has a chance to
> >>
> >> Means another #GP in next instruction i.e. Beginning of #GP handler
> >> in guest.
> >> No #DF here.
> >>
> > In this case we will enter guest with "NMI windows open" request and
> > should exit immediately before first instruction of #GP handler. At
> > this moment KVM will be able to inject NMI.
>
> If the HW NMI windows is supported, it is fine, how about SW NMI case?
There is no SW NMI case. There are buggy CPUs that does not provide
proper NMI injection support. There is a hack to implement kinda working
NMI on such CPUs, but there are enough scenarios that will not work correctly
to not rely on this hack if your guest uses NMI for something serious.
> The flow will then look like:
>
> Guest #GP instruction -> VM Exit -> Inject virtual #GP -> VMRESUME ->
> try to execute 1st ins of guest #GP handler -> VM Exit again (#GP) ->
> inject virtual #GP -> .....
No. This will look like: Guest #GP instruction -> guest #GP handler ->
try to execute 1st ins of guest #GP handler -> Guest #GP -> guest #GP
handler -> something sends NMI -> vcpu_kick() -> VM Exit due to kick()
-> inject NMI or the next VM entry -> everything works event on cpus
with "SW NMI". KVM is not emulator. Avi can you rename the project to
KINE please.
--
Gleb.
^ permalink raw reply [flat|nested] 33+ messages in thread
end of thread, other threads:[~2009-05-17 9:44 UTC | newest]
Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-30 7:24 Implement generic double fault generation mechanism Dong, Eddie
2009-05-03 10:53 ` Gleb Natapov
2009-05-08 8:27 ` Dong, Eddie
2009-05-08 9:53 ` Gleb Natapov
2009-05-08 10:39 ` Dong, Eddie
2009-05-08 10:46 ` Dong, Eddie
2009-05-08 12:23 ` Gleb Natapov
2009-05-08 15:00 ` Dong, Eddie
2009-05-08 18:44 ` Gleb Natapov
2009-05-11 1:04 ` Dong, Eddie
2009-05-11 6:02 ` Gleb Natapov
2009-05-12 5:35 ` Dong, Eddie
2009-05-12 7:01 ` Gleb Natapov
2009-05-12 15:06 ` Enable IRQ windows after exception injection if there are pending virq Dong, Eddie
2009-05-12 15:27 ` Gleb Natapov
2009-05-13 7:45 ` Dong, Eddie
2009-05-13 10:29 ` Gleb Natapov
2009-05-13 14:05 ` Implement generic double fault generation mechanism Dong, Eddie
2009-05-11 6:17 ` Avi Kivity
2009-05-12 7:38 ` event injection MACROs Dong, Eddie
2009-05-12 8:49 ` Gleb Natapov
2009-05-13 9:49 ` Avi Kivity
2009-05-13 14:20 ` Dong, Eddie
2009-05-14 9:27 ` Avi Kivity
2009-05-14 13:43 ` Dong, Eddie
2009-05-14 14:16 ` Gleb Natapov
2009-05-14 14:34 ` Dong, Eddie
2009-05-14 15:44 ` Gleb Natapov
2009-05-15 7:57 ` Dong, Eddie
2009-05-17 9:44 ` Gleb Natapov
2009-05-08 12:16 ` Implement generic double fault generation mechanism Gleb Natapov
2009-05-08 8:19 ` Dong, Eddie
2009-05-08 8:28 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox