* kernel device reset support
@ 2007-10-08 10:17 Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BB36-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 24+ messages in thread
From: Dong, Eddie @ 2007-10-08 10:17 UTC (permalink / raw)
To: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 3212 bytes --]
Kernel side patch to introduce a new API for kernel device reset and
force
vcpu mp_state to UNINATIALIZED state if it is reset.
thx,eddie
Add VM reset support in kernel side to
reset the kernel devices and VCPUs.
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f60255c..661743a 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -330,6 +330,7 @@ struct kvm_vcpu {
unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
+ int force_to_quit;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
#define VCPU_MP_STATE_RUNNABLE 0
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 71ed1b3..0958f78 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -2199,6 +2199,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu);
if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+ if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic)
+ kvm_lapic_reset(vcpu);
kvm_vcpu_block(vcpu);
vcpu_put(vcpu);
return -EAGAIN;
@@ -3095,6 +3097,41 @@ out:
return r;
}
+void kvm_reset_devices(struct kvm *kvm)
+{
+ kvm_pic_reset(&pic_irqchip(kvm)->pics[1]);
+ kvm_pic_reset(&pic_irqchip(kvm)->pics[0]);
+ pic_irqchip(kvm)->output = 0;
+ kvm_ioapic_reset(kvm->vioapic);
+}
+
+/*
+ * Reset VM.
+ *
+ */
+int kvm_vm_reset(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_reset_devices(kvm);
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ /* active VCPU */
+ if (vcpu->vcpu_id)
+ vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
+ else {
+ vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+ kvm_lapic_reset(vcpu);
+ }
+ vcpu->force_to_quit = 1;
+ kvm_vcpu_kick(vcpu);
+ }
+ return 0;
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3163,6 +3200,12 @@ static long kvm_vm_ioctl(struct file *filp,
else
goto out;
break;
+ case KVM_RESET:
+ r = -EFAULT;
+ if (!irqchip_in_kernel(kvm))
+ goto out;
+ r = kvm_vm_reset(kvm);
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 3d8e01e..0799aad 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1860,6 +1860,10 @@ static int handle_external_interrupt(struct
kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
++vcpu->stat.irq_exits;
+ if (vcpu->force_to_quit) {
+ vcpu->force_to_quit = 0;
+ return -EINTR;
+ }
return 1;
}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d62d3b2..2fd731f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -371,6 +371,7 @@ struct kvm_signal_mask {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct
kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct
kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct
kvm_irqchip)
+#define KVM_RESET _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
[-- Attachment #2: reset-k2.patch --]
[-- Type: application/octet-stream, Size: 3038 bytes --]
commit 759adceb0eea137fbd15ec542e6bfc778d393955
Author: root <root@vt32-pae.(none)>
Date: Mon Oct 8 18:12:09 2007 +0800
Add VM reset support in kernel side to
reset the kernel devices and VCPUs.
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f60255c..661743a 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -330,6 +330,7 @@ struct kvm_vcpu {
unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
+ int force_to_quit;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
#define VCPU_MP_STATE_RUNNABLE 0
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 71ed1b3..0958f78 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -2199,6 +2199,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu);
if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+ if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic)
+ kvm_lapic_reset(vcpu);
kvm_vcpu_block(vcpu);
vcpu_put(vcpu);
return -EAGAIN;
@@ -3095,6 +3097,41 @@ out:
return r;
}
+void kvm_reset_devices(struct kvm *kvm)
+{
+ kvm_pic_reset(&pic_irqchip(kvm)->pics[1]);
+ kvm_pic_reset(&pic_irqchip(kvm)->pics[0]);
+ pic_irqchip(kvm)->output = 0;
+ kvm_ioapic_reset(kvm->vioapic);
+}
+
+/*
+ * Reset VM.
+ *
+ */
+int kvm_vm_reset(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_reset_devices(kvm);
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ /* active VCPU */
+ if (vcpu->vcpu_id)
+ vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
+ else {
+ vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+ kvm_lapic_reset(vcpu);
+ }
+ vcpu->force_to_quit = 1;
+ kvm_vcpu_kick(vcpu);
+ }
+ return 0;
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3163,6 +3200,12 @@ static long kvm_vm_ioctl(struct file *filp,
else
goto out;
break;
+ case KVM_RESET:
+ r = -EFAULT;
+ if (!irqchip_in_kernel(kvm))
+ goto out;
+ r = kvm_vm_reset(kvm);
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 3d8e01e..0799aad 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1860,6 +1860,10 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
++vcpu->stat.irq_exits;
+ if (vcpu->force_to_quit) {
+ vcpu->force_to_quit = 0;
+ return -EINTR;
+ }
return 1;
}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d62d3b2..2fd731f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -371,6 +371,7 @@ struct kvm_signal_mask {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_RESET _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
[-- Attachment #3: Type: text/plain, Size: 314 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 24+ messages in thread[parent not found: <10EA09EFD8728347A513008B6B0DA77A0231BB36-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BB36-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-08 10:24 ` Avi Kivity [not found] ` <470A0556.80903-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 2007-10-08 10:27 ` Avi Kivity 1 sibling, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-08 10:24 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Kernel side patch to introduce a new API for kernel device reset and > force > vcpu mp_state to UNINATIALIZED state if it is reset. > thx,eddie > > > > + > +/* > + * Reset VM. > + * > + */ > +int kvm_vm_reset(struct kvm *kvm) > +{ > + struct kvm_vcpu *vcpu; > + int i; > + > + kvm_reset_devices(kvm); > + for (i = 0; i < KVM_MAX_VCPUS; i++) { > + vcpu = kvm->vcpus[i]; > + if (!vcpu) > + continue; > + /* active VCPU */ > + if (vcpu->vcpu_id) > + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; > + else { > + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; > + kvm_lapic_reset(vcpu); > Why is the handling here different? > + } > + vcpu->force_to_quit = 1; > + kvm_vcpu_kick(vcpu); > + } > + return 0; > +} > + > static long kvm_vm_ioctl(struct file *filp, > unsigned int ioctl, unsigned long arg) > { > @@ -3163,6 +3200,12 @@ static long kvm_vm_ioctl(struct file *filp, > else > goto out; > break; > + case KVM_RESET: > + r = -EFAULT; > -EINVAL. Or better accept it always and only reset the vcpu if !irqchip_in_kernel(). > + if (!irqchip_in_kernel(kvm)) > + goto out; > + r = kvm_vm_reset(kvm); > + break; > case KVM_IRQ_LINE: { > struct kvm_irq_level irq_event; > > diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c > index 3d8e01e..0799aad 100644 > --- a/drivers/kvm/vmx.c > +++ b/drivers/kvm/vmx.c > @@ -1860,6 +1860,10 @@ static int handle_external_interrupt(struct > kvm_vcpu *vcpu, > struct kvm_run *kvm_run) > { > ++vcpu->stat.irq_exits; > + if (vcpu->force_to_quit) { > + vcpu->force_to_quit = 0; > + return -EINTR; > + } > return 1; > } > Why is this needed? Userspace can always force an exit by sending a signal. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470A0556.80903-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470A0556.80903-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-09 1:58 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BD83-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-09 1:58 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel Avi Kivity wrote: > Dong, Eddie wrote: >> Kernel side patch to introduce a new API for kernel device reset and >> force vcpu mp_state to UNINATIALIZED state if it is reset. >> thx,eddie >> >> >> >> + >> +/* >> + * Reset VM. >> + * >> + */ >> +int kvm_vm_reset(struct kvm *kvm) >> +{ >> + struct kvm_vcpu *vcpu; >> + int i; >> + >> + kvm_reset_devices(kvm); >> + for (i = 0; i < KVM_MAX_VCPUS; i++) { >> + vcpu = kvm->vcpus[i]; >> + if (!vcpu) >> + continue; >> + /* active VCPU */ >> + if (vcpu->vcpu_id) >> + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; >> + else { >> + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; >> + kvm_lapic_reset(vcpu); >> > > Why is the handling here different? In native, RESET signal force every processor enter "RESET" status, and then immediately after RESET signal is removed, all CPUs will compete for BSP role, the winner continue execution, and failor will be blocked till INIT/SIPI/SIPI. > >> + } >> + vcpu->force_to_quit = 1; >> + kvm_vcpu_kick(vcpu); >> + } >> + return 0; >> +} >> + >> static long kvm_vm_ioctl(struct file *filp, >> unsigned int ioctl, unsigned long arg) >> { >> @@ -3163,6 +3200,12 @@ static long kvm_vm_ioctl(struct file *filp, >> else goto out; >> break; >> + case KVM_RESET: >> + r = -EFAULT; >> > > -EINVAL. Or better accept it always and only reset the vcpu if > !irqchip_in_kernel(). Fixed. > >> + if (!irqchip_in_kernel(kvm)) >> + goto out; >> + r = kvm_vm_reset(kvm); >> + break; >> case KVM_IRQ_LINE: { >> struct kvm_irq_level irq_event; >> >> diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c >> index 3d8e01e..0799aad 100644 >> --- a/drivers/kvm/vmx.c >> +++ b/drivers/kvm/vmx.c >> @@ -1860,6 +1860,10 @@ static int handle_external_interrupt(struct >> kvm_vcpu *vcpu, struct kvm_run *kvm_run) >> { >> ++vcpu->stat.irq_exits; >> + if (vcpu->force_to_quit) { >> + vcpu->force_to_quit = 0; >> + return -EINTR; >> + } >> return 1; >> } >> > > Why is this needed? For a graceful reboot, this one is not needed since every APs are already brought to HALT status before BSP issue RESET signal. But in case of non-graceful reboot, it is possible the VCPUs are still executing guest instruction, so we kick the VCPU and then use this logic to force the exception handler to be a heavy VM Exit and execute following code at beginning of kvm_vcpu_ioctl_run. (Let VCPU_MP_STATE_UNINITIALIZED take effective) if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) kvm_lapic_reset(vcpu); kvm_vcpu_block(vcpu); vcpu_put(vcpu); return -EAGAIN; } Whether we need to handle those un-graceful reboot case is up to you :-) If not, then those code can be removed. > > Userspace can always force an exit by sending a signal. > >> +/* >> + * Reset VM. >> + * >> + */ >> +int kvm_vm_reset(struct kvm *kvm) >> +{ >> + struct kvm_vcpu *vcpu; >> + int i; >> + >> + kvm_reset_devices(kvm); >> > > Need to take kvm->lock around this. Mmm, I will move this to be after VCPU reset. If a VCPU is still accessing (write) devices register, we always have problem. So move after all the processors enetring frozen state will be simple and safer. Any opnion? Will post after your new comments. thx,eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A0231BD83-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BD83-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-09 9:34 ` Avi Kivity [not found] ` <470B4B2E.1000500-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-09 9:34 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: >>> >>> + >>> +/* >>> + * Reset VM. >>> + * >>> + */ >>> +int kvm_vm_reset(struct kvm *kvm) >>> +{ >>> + struct kvm_vcpu *vcpu; >>> + int i; >>> + >>> + kvm_reset_devices(kvm); >>> + for (i = 0; i < KVM_MAX_VCPUS; i++) { >>> + vcpu = kvm->vcpus[i]; >>> + if (!vcpu) >>> + continue; >>> + /* active VCPU */ >>> + if (vcpu->vcpu_id) >>> + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; >>> + else { >>> + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; >>> + kvm_lapic_reset(vcpu); >>> >>> >> Why is the handling here different? >> > > In native, RESET signal force every processor enter "RESET" status, and > then immediately after RESET signal is removed, all CPUs will compete > for BSP role, the winner continue execution, and failor will be blocked > till INIT/SIPI/SIPI. > I meant, you could set both to VCPU_MP_STATE_UNINITIALIZED and let the vcpu code do the reset differently depending on vcpu_id. This way you don't run into locking issues (kvm_lapic_reset() needs the vcpu lock?) > >>> { >>> ++vcpu->stat.irq_exits; >>> + if (vcpu->force_to_quit) { >>> + vcpu->force_to_quit = 0; >>> + return -EINTR; >>> + } >>> return 1; >>> } >>> >>> >> Why is this needed? >> > > For a graceful reboot, this one is not needed since every APs are > already brought to HALT status before BSP issue RESET signal. But in > case of non-graceful reboot, it is possible the VCPUs are still > executing guest instruction, so we kick the VCPU and then use this logic > to force the exception handler to be a heavy VM Exit and execute > following code at beginning of kvm_vcpu_ioctl_run. (Let > VCPU_MP_STATE_UNINITIALIZED take effective) > > if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { > if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) > kvm_lapic_reset(vcpu); > kvm_vcpu_block(vcpu); > vcpu_put(vcpu); > return -EAGAIN; > } > > Whether we need to handle those un-graceful reboot case is up to you :-) > If not, then those code can be removed. > We do need to support ungraceful resets. But this could easily be done via vcpu->requests. To reset a vcpu: set_bit(KVM_REQ_RESET, &vcpu->requests) kvm_vcpu_kick(vcpu); And in __vcpu_run(): if (vcpu_requests) { if (test_and_reset_bit(KVM_REQ_RESET, &vcpu->requests)) .... } > >> Userspace can always force an exit by sending a signal. >> >> >>> +/* >>> + * Reset VM. >>> + * >>> + */ >>> +int kvm_vm_reset(struct kvm *kvm) >>> +{ >>> + struct kvm_vcpu *vcpu; >>> + int i; >>> + >>> + kvm_reset_devices(kvm); >>> >>> >> Need to take kvm->lock around this. >> > > Mmm, I will move this to be after VCPU reset. > If a VCPU is still accessing (write) devices register, we always have > problem. > So move after all the processors enetring frozen state will be simple > and safer. > > Any opnion? Will post after your new comments. > > Sounds good. But the BSP starts executing immediately, no? So maybe two stages for vcpu reset: first to reset and halt it, then start it. pic and ioapic reset can be performed in between. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470B4B2E.1000500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470B4B2E.1000500-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-09 9:53 ` Avi Kivity 2007-10-09 10:11 ` Dong, Eddie 1 sibling, 0 replies; 24+ messages in thread From: Avi Kivity @ 2007-10-09 9:53 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Avi Kivity wrote: >>> >>> >> For a graceful reboot, this one is not needed since every APs are >> already brought to HALT status before BSP issue RESET signal. But in >> case of non-graceful reboot, it is possible the VCPUs are still >> executing guest instruction, so we kick the VCPU and then use this logic >> to force the exception handler to be a heavy VM Exit and execute >> following code at beginning of kvm_vcpu_ioctl_run. (Let >> VCPU_MP_STATE_UNINITIALIZED take effective) >> >> if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { >> if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) >> kvm_lapic_reset(vcpu); >> kvm_vcpu_block(vcpu); >> vcpu_put(vcpu); >> return -EAGAIN; >> } >> >> Whether we need to handle those un-graceful reboot case is up to you :-) >> If not, then those code can be removed. >> >> > > We do need to support ungraceful resets. But this could easily be done > via vcpu->requests. > > To reset a vcpu: > > set_bit(KVM_REQ_RESET, &vcpu->requests) > kvm_vcpu_kick(vcpu); > > And in __vcpu_run(): > > if (vcpu_requests) { > if (test_and_reset_bit(KVM_REQ_RESET, &vcpu->requests)) > .... > } > > To be clear, you can move the reset call to that location ("...") and avoid the need to take a heavyweight exit completely. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: kernel device reset support [not found] ` <470B4B2E.1000500-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 2007-10-09 9:53 ` Avi Kivity @ 2007-10-09 10:11 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231C1AA-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 1 sibling, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-09 10:11 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel Avi Kivity wrote: >> In native, RESET signal force every processor enter "RESET" status, >> and then immediately after RESET signal is removed, all CPUs will >> compete for BSP role, the winner continue execution, and failor will >> be blocked till INIT/SIPI/SIPI. >> > > I meant, you could set both to VCPU_MP_STATE_UNINITIALIZED and let the > vcpu code do the reset differently depending on vcpu_id. This way you > don't run into locking issues (kvm_lapic_reset() needs the vcpu lock?) If BSP mp_state becomes VCPU_MP_STATE_UNINITIALIZED, current code can't wakeup it. We need additional code that I am not aware of now. Current VCPU must be BSP, otherwise the code executing in Qemu will have problem too. >> For a graceful reboot, this one is not needed since every APs are >> already brought to HALT status before BSP issue RESET signal. But in >> case of non-graceful reboot, it is possible the VCPUs are still >> executing guest instruction, so we kick the VCPU and then use this >> logic to force the exception handler to be a heavy VM Exit and >> execute following code at beginning of kvm_vcpu_ioctl_run. (Let >> VCPU_MP_STATE_UNINITIALIZED take effective) >> >> if (unlikely(vcpu->mp_state == > VCPU_MP_STATE_UNINITIALIZED)) { >> if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) >> kvm_lapic_reset(vcpu); >> kvm_vcpu_block(vcpu); >> vcpu_put(vcpu); >> return -EAGAIN; >> } >> >> Whether we need to handle those un-graceful reboot case is up to you >> :-) If not, then those code can be removed. >> > > We do need to support ungraceful resets. But this could > easily be done > via vcpu->requests. Mmm, almost Yes, then it is same with force_to_quit. But reset doesn't mean lapic_reset only, it needs to enter wait for INIT/SIPI cycles. So using mp_state is much closer. > > To reset a vcpu: > > set_bit(KVM_REQ_RESET, &vcpu->requests) > kvm_vcpu_kick(vcpu); > > And in __vcpu_run(): > > if (vcpu_requests) { > if (test_and_reset_bit(KVM_REQ_RESET, &vcpu->requests)) > .... } > >> >> Mmm, I will move this to be after VCPU reset. >> If a VCPU is still accessing (write) devices register, we always >> have problem. So move after all the processors enetring frozen state >> will be simple and safer. >> >> Any opnion? Will post after your new comments. >> >> > > Sounds good. But the BSP starts executing immediately, no? Yes, BSP will continue. current VCPU=BSP. > > So maybe two stages for vcpu reset: first to reset and halt it, then > start it. pic and ioapic reset can be performed in between. > thx,eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A0231C1AA-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231C1AA-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-09 10:17 ` Avi Kivity [not found] ` <470B5528.2010605-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-09 10:17 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Avi Kivity wrote: > >>> In native, RESET signal force every processor enter "RESET" status, >>> and then immediately after RESET signal is removed, all CPUs will >>> compete for BSP role, the winner continue execution, and failor will >>> be blocked till INIT/SIPI/SIPI. >>> >>> >> I meant, you could set both to VCPU_MP_STATE_UNINITIALIZED and let the >> vcpu code do the reset differently depending on vcpu_id. This way you >> don't run into locking issues (kvm_lapic_reset() needs the vcpu lock?) >> > > If BSP mp_state becomes VCPU_MP_STATE_UNINITIALIZED, current code > can't wakeup it. We need additional code that I am not aware of now. > > Current VCPU must be BSP, otherwise the code executing in Qemu will have > problem too. > But, for an ungraceful reset, nothing prevents an AP from issuing a reset? > >>> For a graceful reboot, this one is not needed since every APs are >>> already brought to HALT status before BSP issue RESET signal. But in >>> case of non-graceful reboot, it is possible the VCPUs are still >>> executing guest instruction, so we kick the VCPU and then use this >>> logic to force the exception handler to be a heavy VM Exit and >>> execute following code at beginning of kvm_vcpu_ioctl_run. (Let >>> VCPU_MP_STATE_UNINITIALIZED take effective) >>> >>> if (unlikely(vcpu->mp_state == >>> >> VCPU_MP_STATE_UNINITIALIZED)) { >> >>> if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) >>> kvm_lapic_reset(vcpu); >>> kvm_vcpu_block(vcpu); >>> vcpu_put(vcpu); >>> return -EAGAIN; >>> } >>> >>> Whether we need to handle those un-graceful reboot case is up to you >>> :-) If not, then those code can be removed. >>> >>> >> We do need to support ungraceful resets. But this could >> easily be done >> via vcpu->requests. >> > > Mmm, almost Yes, then it is same with force_to_quit. > But reset doesn't mean lapic_reset only, it needs to > enter wait for INIT/SIPI cycles. So using mp_state is > much closer. > > What I mean is to use mp_state within the vcpu code (while holding the vcpu mutex) and to use vcpu->requests as a means to tell the vcpu it needs to change state. >> To reset a vcpu: >> >> set_bit(KVM_REQ_RESET, &vcpu->requests) >> kvm_vcpu_kick(vcpu); >> >> And in __vcpu_run(): >> >> if (vcpu_requests) { >> if (test_and_reset_bit(KVM_REQ_RESET, &vcpu->requests)) >> .... } >> >> >>> Mmm, I will move this to be after VCPU reset. >>> If a VCPU is still accessing (write) devices register, we always >>> have problem. So move after all the processors enetring frozen state >>> will be simple and safer. >>> >>> Any opnion? Will post after your new comments. >>> >>> >>> >> Sounds good. But the BSP starts executing immediately, no? >> > > Yes, BSP will continue. current VCPU=BSP. > It's a vm ioctl, there is no current vcpu! -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470B5528.2010605-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470B5528.2010605-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-09 10:36 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231C1B2-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-09 10:36 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel Avi Kivity wrote: >> >> If BSP mp_state becomes VCPU_MP_STATE_UNINITIALIZED, current code >> can't wakeup it. We need additional code that I am not aware of now. >> >> Current VCPU must be BSP, otherwise the code executing in Qemu will >> have problem too. >> > > But, for an ungraceful reset, nothing prevents an AP from > issuing a reset? Mmm, Yes, but I think current architecture can't handle this. The thread where AP issues "RESET" will continue run, which means it becomes BSP now and wake up other APs later on. Or We can block that AP first and then inform BSP to do RESET job. Here we need to block the AP in kernel so that we can wake up. It can be a future task which is not that high priority IMO. I will focus on SMP boot 1st. Your opnion? > > >> >> Mmm, almost Yes, then it is same with force_to_quit. >> But reset doesn't mean lapic_reset only, it needs to >> enter wait for INIT/SIPI cycles. So using mp_state is >> much closer. >> >> > > What I mean is to use mp_state within the vcpu code (while holding the > vcpu mutex) and to use vcpu->requests as a means to tell the vcpu it > needs to change state. > Then we need to add code to enter waitqueque here. I think force_to_quit is much simple and efficient since we don't need to test (atomic test) at each VM Exit even light weight VM Exit. But certainly it can. If you want to save the per VCPU force_to_quit, we can share it with vcpu_request, but test in external IRQ is better IMO since we take a kick now. >>> Sounds good. But the BSP starts executing immediately, no? >>> >> >> Yes, BSP will continue. current VCPU=BSP. >> > > It's a vm ioctl, there is no current vcpu! > Yes, but it is in the thread BSP use :-) Thx,eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A0231C1B2-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231C1B2-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-09 10:55 ` Avi Kivity [not found] ` <470B5E3B.4060006-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-09 10:55 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Avi Kivity wrote: > >>> If BSP mp_state becomes VCPU_MP_STATE_UNINITIALIZED, current code >>> can't wakeup it. We need additional code that I am not aware of now. >>> >>> Current VCPU must be BSP, otherwise the code executing in Qemu will >>> have problem too. >>> >>> >> But, for an ungraceful reset, nothing prevents an AP from >> issuing a reset? >> > > Mmm, Yes, but I think current architecture can't handle this. > The thread where AP issues "RESET" will continue run, which > means it becomes BSP now and wake up other APs later on. > Or We can block that AP first and then inform BSP to do > RESET job. Here we need to block the AP in kernel > so that we can wake up. > It should call vcpu_halt() immediately after reset. > It can be a future task which is not that high priority IMO. > I will focus on SMP boot 1st. Your opnion? > Agree. But let's make it close to the complete solution. >>> >>> >> What I mean is to use mp_state within the vcpu code (while holding the >> vcpu mutex) and to use vcpu->requests as a means to tell the vcpu it >> needs to change state. >> >> > > Then we need to add code to enter waitqueque here. I think force_to_quit > is much simple and efficient since we don't need to test (atomic test) > at each VM Exit even light weight VM Exit. > The test for vcpu->requests already exists (and is needed for tlb flushes) so there is no additional performance hit. > But certainly it can. If you want to save the per VCPU force_to_quit, we > can share it with vcpu_request, but test in external IRQ is better IMO > since we take a kick now. > The vcpu may have exited due to some other reason, and the interrupt taken in host context? -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470B5E3B.4060006-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470B5E3B.4060006-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-10 6:17 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364242-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-10 6:17 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel [-- Attachment #1: Type: text/plain, Size: 5015 bytes --] Avi Kivity wrote: > Dong, Eddie wrote: >> Avi Kivity wrote: >> >>> But, for an ungraceful reset, nothing prevents an AP from >>> issuing a reset? >>> >> >> Mmm, Yes, but I think current architecture can't handle this. >> The thread where AP issues "RESET" will continue run, which >> means it becomes BSP now and wake up other APs later on. >> Or We can block that AP first and then inform BSP to do >> RESET job. Here we need to block the AP in kernel >> so that we can wake up. >> > > It should call vcpu_halt() immediately after reset. ?? Can user level be able to enter kernel HALT state. > >> It can be a future task which is not that high priority IMO. >> I will focus on SMP boot 1st. Your opnion? >> > > Agree. But let's make it close to the complete solution. > Yes, halt all APs and let BSP do reset ops in user level. Will post patch to Qemu to support SMP reboot some time later. > > The test for vcpu->requests already exists (and is needed for tlb > flushes) so there is no additional performance hit. OK, that makes sense. changed. please verify. User level is split into libkvm and qemu. thx,eddie commit a0aed7040befe198491254d3459aeb5897f5f2c1 Author: root <root@vt32-pae.(none)> Date: Wed Oct 10 13:45:58 2007 +0800 Add VM reset support in kernel side to reset the kernel devices and force APs to enter wait for INIT/SIPI state. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 4ab487c..81c9af1 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -68,6 +68,7 @@ * vcpu->requests bit members */ #define KVM_TLB_FLUSH 0 +#define KVM_FROZEN 1 /* * Address types: diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 0b2894a..33f16bd 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2189,9 +2189,17 @@ again: vcpu->guest_mode = 1; - if (vcpu->requests) + if (vcpu->requests) { if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) kvm_x86_ops->tlb_flush(vcpu); + if (test_and_clear_bit(KVM_FROZEN, &vcpu->requests)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_FROZEN; + goto out; + } + } kvm_x86_ops->run(vcpu, kvm_run); @@ -2245,6 +2253,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { + if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) + kvm_lapic_reset(vcpu); kvm_vcpu_block(vcpu); vcpu_put(vcpu); return -EAGAIN; @@ -3143,6 +3153,54 @@ out: return r; } +/* + * Reset kernel devices. + */ +void kvm_reset_devices(struct kvm *kvm) +{ + kvm_pic_reset(&pic_irqchip(kvm)->pics[1]); + kvm_pic_reset(&pic_irqchip(kvm)->pics[0]); + pic_irqchip(kvm)->output = 0; + kvm_ioapic_reset(kvm->vioapic); +} + +/* + * Kernel side VM Reset. + * NOTE here: User level code must guarantee only the BSP + * thread can do this call. + * + */ +int kvm_vm_reset(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + /* active VCPU */ + if (vcpu->vcpu_id) { + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; + set_bit(KVM_FROZEN, &vcpu->requests); + kvm_vcpu_kick(vcpu); + /* + * Wait till the AP entered waiting for + * INIT/SIPI state + */ + while (test_bit(KVM_FROZEN, &vcpu->requests)) + schedule(); + } + else { + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + kvm_lapic_reset(vcpu); + } + } + /* Now only BSP is running... */ + kvm_reset_devices(kvm); + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3228,6 +3286,12 @@ static long kvm_vm_ioctl(struct file *filp, } else goto out; break; + case KVM_RESET: + r = -EINVAL; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_reset(kvm); + break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index cd4ac12..ea9c004 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -127,7 +127,8 @@ enum kvm_exit_reason { KVM_EXIT_SHUTDOWN = 8, KVM_EXIT_FAIL_ENTRY = 9, KVM_EXIT_INTR = 10, - KVM_EXIT_SET_TPR = 11 + KVM_EXIT_SET_TPR = 11, + KVM_EXIT_FROZEN = 12 }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ @@ -383,6 +384,7 @@ struct kvm_signal_mask { #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_RESET _IO(KVMIO, 0x64) /* * ioctls for vcpu fds [-- Attachment #2: rbt-u2.patch --] [-- Type: application/octet-stream, Size: 748 bytes --] commit 50ddc4b612976847715a81da61e3dce5b486d49e Author: root <root@vt32-pae.(none)> Date: Wed Oct 10 13:54:58 2007 +0800 Add callback for kernel reset operation. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index 4d0bb93..d81e7e1 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -721,6 +721,10 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, int boot_device, vmport_init(env); } +#ifdef USE_KVM + if (kvm_allowed && kvm_irqchip_in_kernel(kvm_context)) + qemu_register_reset(kvm_reset_kernel, kvm_context); +#endif /* allocate RAM */ ram_addr = qemu_ram_alloc(ram_size); cpu_register_physical_memory(0, ram_size, ram_addr); [-- Attachment #3: rbt-u1.patch --] [-- Type: application/octet-stream, Size: 1334 bytes --] commit 40dd3641611d585a914450bdf4410c626eeaf563 Author: root <root@vt32-pae.(none)> Date: Wed Oct 10 13:56:59 2007 +0800 Add kernel reset support API and a handler for KVM_EXIT_FROZEN. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> diff --git a/user/kvmctl.c b/user/kvmctl.c index 5f63c64..e5370da 100644 --- a/user/kvmctl.c +++ b/user/kvmctl.c @@ -1335,6 +1335,8 @@ again: break; case KVM_EXIT_SET_TPR: break; + case KVM_EXIT_FROZEN: + break; default: fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason); kvm_show_regs(kvm, vcpu); @@ -1407,3 +1409,10 @@ int kvm_irqchip_in_kernel(kvm_context_t kvm) { return kvm->irqchip_in_kernel; } + +void kvm_reset_kernel(kvm_context_t kvm) +{ + if (kvm_irqchip_in_kernel(kvm)) + return ioctl(kvm->vm_fd, KVM_RESET, 0); +} + diff --git a/user/kvmctl.h b/user/kvmctl.h index 09cf6d7..9f56027 100644 --- a/user/kvmctl.h +++ b/user/kvmctl.h @@ -453,6 +453,13 @@ int kvm_dirty_pages_log_enable_all(kvm_context_t kvm); int kvm_dirty_pages_log_reset(kvm_context_t kvm); /*! + * \brief Reset kernel devices. + * + * \param kvm Pointer to the current kvm_context + */ +void kvm_reset_kernel(kvm_context_t kvm); + +/*! * \brief Query whether in kernel irqchip is used * * \param kvm Pointer to the current kvm_context [-- Attachment #4: rbt-k3.patch --] [-- Type: application/octet-stream, Size: 3662 bytes --] commit a0aed7040befe198491254d3459aeb5897f5f2c1 Author: root <root@vt32-pae.(none)> Date: Wed Oct 10 13:45:58 2007 +0800 Add VM reset support in kernel side to reset the kernel devices and force APs to enter wait for INIT/SIPI state. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 4ab487c..81c9af1 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -68,6 +68,7 @@ * vcpu->requests bit members */ #define KVM_TLB_FLUSH 0 +#define KVM_FROZEN 1 /* * Address types: diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 0b2894a..33f16bd 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2189,9 +2189,17 @@ again: vcpu->guest_mode = 1; - if (vcpu->requests) + if (vcpu->requests) { if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) kvm_x86_ops->tlb_flush(vcpu); + if (test_and_clear_bit(KVM_FROZEN, &vcpu->requests)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_FROZEN; + goto out; + } + } kvm_x86_ops->run(vcpu, kvm_run); @@ -2245,6 +2253,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { + if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) + kvm_lapic_reset(vcpu); kvm_vcpu_block(vcpu); vcpu_put(vcpu); return -EAGAIN; @@ -3143,6 +3153,54 @@ out: return r; } +/* + * Reset kernel devices. + */ +void kvm_reset_devices(struct kvm *kvm) +{ + kvm_pic_reset(&pic_irqchip(kvm)->pics[1]); + kvm_pic_reset(&pic_irqchip(kvm)->pics[0]); + pic_irqchip(kvm)->output = 0; + kvm_ioapic_reset(kvm->vioapic); +} + +/* + * Kernel side VM Reset. + * NOTE here: User level code must guarantee only the BSP + * thread can do this call. + * + */ +int kvm_vm_reset(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + /* active VCPU */ + if (vcpu->vcpu_id) { + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; + set_bit(KVM_FROZEN, &vcpu->requests); + kvm_vcpu_kick(vcpu); + /* + * Wait till the AP entered waiting for + * INIT/SIPI state + */ + while (test_bit(KVM_FROZEN, &vcpu->requests)) + schedule(); + } + else { + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + kvm_lapic_reset(vcpu); + } + } + /* Now only BSP is running... */ + kvm_reset_devices(kvm); + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3228,6 +3286,12 @@ static long kvm_vm_ioctl(struct file *filp, } else goto out; break; + case KVM_RESET: + r = -EINVAL; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_reset(kvm); + break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index cd4ac12..ea9c004 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -127,7 +127,8 @@ enum kvm_exit_reason { KVM_EXIT_SHUTDOWN = 8, KVM_EXIT_FAIL_ENTRY = 9, KVM_EXIT_INTR = 10, - KVM_EXIT_SET_TPR = 11 + KVM_EXIT_SET_TPR = 11, + KVM_EXIT_FROZEN = 12 }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ @@ -383,6 +384,7 @@ struct kvm_signal_mask { #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_RESET _IO(KVMIO, 0x64) /* * ioctls for vcpu fds [-- Attachment #5: Type: text/plain, Size: 314 bytes --] ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ [-- Attachment #6: Type: text/plain, Size: 186 bytes --] _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel ^ permalink raw reply related [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A02364242-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364242-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-10 10:23 ` Avi Kivity [not found] ` <470CA814.9050907-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-10 10:23 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Avi Kivity wrote: > >> Dong, Eddie wrote: >> >>> Avi Kivity wrote: >>> >>> >>>> But, for an ungraceful reset, nothing prevents an AP from >>>> issuing a reset? >>>> >>>> >>> Mmm, Yes, but I think current architecture can't handle this. >>> The thread where AP issues "RESET" will continue run, which >>> means it becomes BSP now and wake up other APs later on. >>> Or We can block that AP first and then inform BSP to do >>> RESET job. Here we need to block the AP in kernel >>> so that we can wake up. >>> >>> >> It should call vcpu_halt() immediately after reset. >> > > ?? Can user level be able to enter kernel HALT state. > > It's just like the guest kernel executing hlt. Why is there a difference? >>> It can be a future task which is not that high priority IMO. >>> I will focus on SMP boot 1st. Your opnion? >>> >>> >> Agree. But let's make it close to the complete solution. >> >> > > Yes, halt all APs and let BSP do reset ops in user level. > Will post patch to Qemu to support SMP reboot some time later. > > Wait, that's a big change. Need to think about this... >> The test for vcpu->requests already exists (and is needed for tlb >> flushes) so there is no additional performance hit. >> > > OK, that makes sense. changed. please verify. > User level is split into libkvm and qemu. > > /* > * Address types: > diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c > index 0b2894a..33f16bd 100644 > --- a/drivers/kvm/kvm_main.c > +++ b/drivers/kvm/kvm_main.c > @@ -2189,9 +2189,17 @@ again: > > vcpu->guest_mode = 1; > > - if (vcpu->requests) > + if (vcpu->requests) { > if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) > kvm_x86_ops->tlb_flush(vcpu); > + if (test_and_clear_bit(KVM_FROZEN, &vcpu->requests)) { > + local_irq_enable(); > + preempt_enable(); > + r = -EINTR; > + kvm_run->exit_reason = KVM_EXIT_FROZEN; > + goto out; > + } > + } > Why not just call vcpu_reset() here, then call vcpu_halt() if we're an AP? > > +/* > + * Kernel side VM Reset. > + * NOTE here: User level code must guarantee only the BSP > + * thread can do this call. > + * > + */ > +int kvm_vm_reset(struct kvm *kvm) > +{ > + struct kvm_vcpu *vcpu; > + int i; > + > + for (i = 0; i < KVM_MAX_VCPUS; i++) { > + vcpu = kvm->vcpus[i]; > + if (!vcpu) > + continue; > + /* active VCPU */ > + if (vcpu->vcpu_id) { > + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; > + set_bit(KVM_FROZEN, &vcpu->requests); > + kvm_vcpu_kick(vcpu); > + /* > + * Wait till the AP entered waiting for > + * INIT/SIPI state > + */ > + while (test_bit(KVM_FROZEN, &vcpu->requests)) > + schedule(); > I don't think we need to wait here. > + } > + else { > + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; > + kvm_lapic_reset(vcpu); > + } > + } > + /* Now only BSP is running... */ > + kvm_reset_devices(kvm); > But now you're reseting the devices while vcpu 0 may be running. If in the first stage you halt all vcpus, and then restart vcpu 0 after the reset, you avoid the race. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470CA814.9050907-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470CA814.9050907-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-11 1:32 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364638-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-11 1:32 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel Avi Kivity wrote: > Dong, Eddie wrote: >> Avi Kivity wrote: >> > > It's just like the guest kernel executing hlt. Why is there a > difference? Current VP wake up logic thru INIT/SIPI doesn't support this when irqchip in kernel. > >> >> Yes, halt all APs and let BSP do reset ops in user level. >> Will post patch to Qemu to support SMP reboot some time later. >> >> > > Wait, that's a big change. Need to think about this... Like talked in previous, we either let BSP do this, or let an AP become BSP. Current Qemu doesn't support this. >> >> - if (vcpu->requests) >> + if (vcpu->requests) { >> if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) >> kvm_x86_ops->tlb_flush(vcpu); >> + if (test_and_clear_bit(KVM_FROZEN, &vcpu->requests)) { >> + local_irq_enable(); + preempt_enable(); >> + r = -EINTR; >> + kvm_run->exit_reason = KVM_EXIT_FROZEN; >> + goto out; >> + } >> + } >> > > > Why not just call vcpu_reset() here, then call vcpu_halt() if > we're an AP? Then you need to duplicate following code, which may bring extra issue such as GUEST_CR3 update (kvm_mmu_reload) which is done after this code but before vcpu->requests test. BTW, switch back to user level to retry is cleaner IMO. if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) kvm_lapic_reset(vcpu); kvm_vcpu_block(vcpu); vcpu_put(vcpu); return -EAGAIN; } > >> + while (test_bit(KVM_FROZEN, &vcpu->requests)) >> + schedule(); >> > > I don't think we need to wait here. The VCPU may be executing in kernel still, which may modify kernel device state. E.g. A VCPU may be doing PIO emulating. If BSP reset the kernel devices earlier than the VCPU modify the device state, we are in trouble. > >> + } >> + else { >> + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; >> + kvm_lapic_reset(vcpu); >> + } >> + } >> + /* Now only BSP is running... */ >> + kvm_reset_devices(kvm); >> > > But now you're reseting the devices while vcpu 0 may be > running. If in No, VCPU0 (BSP) is current VCPU (though you don't have the current vcpu parameter explicitly) like mentioned in previous mail and as pre-requirement of user level change. Please refer my abswer above of this mail. > the first stage you halt all vcpus, and then restart vcpu 0 after the > reset, you avoid the race. > No race here. The code is executing in VCPU0 context. thx,eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A02364638-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364638-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-11 7:24 ` Dong, Eddie 2007-10-11 12:11 ` Avi Kivity 1 sibling, 0 replies; 24+ messages in thread From: Dong, Eddie @ 2007-10-11 7:24 UTC (permalink / raw) To: Dong, Eddie, Avi Kivity; +Cc: kvm-devel This one can force qemu to execute reset ops in BSP only. As addition to previous user level patch. But if BSP is halted and executing some code without backing to user level, then we have trouble. But I won't worry about this right now, since it is also impact device model performance if KVM doesn't back to user for a long time. BTW, XEN support reset by re-start Qemu-DM and guest, which is quit simple and without additional issues. But KVM decide to do within Qemu. There are many ways a guest can do reboot such as tripple fault, port CF9 write etc. keyboard write etc. Current Qemu only handle keyboard port (0x61) reset method. We can add tripple fault reboot support in future. thx,eddie diff --git a/qemu/vl.c b/qemu/vl.c index 634fb34..b65b165 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -7265,7 +7265,7 @@ int main_loop(void) ret = EXCP_INTERRUPT; break; } - if (reset_requested) { + if (reset_requested && env->cpu_index == 0) { reset_requested = 0; qemu_system_reset(); #ifdef USE_KVM ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364638-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 2007-10-11 7:24 ` Dong, Eddie @ 2007-10-11 12:11 ` Avi Kivity [not found] ` <470E130D.6080808-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 1 sibling, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-11 12:11 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Avi Kivity wrote: > >> Dong, Eddie wrote: >> >>> Avi Kivity wrote: >>> >>> > > >> It's just like the guest kernel executing hlt. Why is there a >> difference? >> > > Current VP wake up logic thru INIT/SIPI doesn't support this when > irqchip in kernel. > > Doesn't this code imply that waiting for SIPI is supported? > static void kvm_vcpu_block(struct kvm_vcpu *vcpu) > { > DECLARE_WAITQUEUE(wait, current); > > add_wait_queue(&vcpu->wq, &wait); > > /* > * We will block until either an interrupt or a signal wakes us up > */ > while (!kvm_cpu_has_interrupt(vcpu) > && !signal_pending(current) > && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE > && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) { (note waiting for SIPI here) > set_current_state(TASK_INTERRUPTIBLE); > vcpu_put(vcpu); > schedule(); > vcpu_load(vcpu); > } > > __set_current_state(TASK_RUNNING); > remove_wait_queue(&vcpu->wq, &wait); > } >>> Yes, halt all APs and let BSP do reset ops in user level. >>> Will post patch to Qemu to support SMP reboot some time later. >>> >>> >>> >> Wait, that's a big change. Need to think about this... >> > > Like talked in previous, we either let BSP do this, or let an AP > become BSP. Current Qemu doesn't support this. > > >>> - if (vcpu->requests) >>> + if (vcpu->requests) { >>> if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) >>> kvm_x86_ops->tlb_flush(vcpu); >>> + if (test_and_clear_bit(KVM_FROZEN, &vcpu->requests)) { >>> + local_irq_enable(); + >>> > preempt_enable(); > >>> + r = -EINTR; >>> + kvm_run->exit_reason = KVM_EXIT_FROZEN; >>> + goto out; >>> + } >>> + } >>> >>> >> Why not just call vcpu_reset() here, then call vcpu_halt() if >> we're an AP? >> > > Then you need to duplicate following code, which may bring extra issue > such as GUEST_CR3 update (kvm_mmu_reload) which is done after > this code but before vcpu->requests test. BTW, switch back to user level > to retry is cleaner IMO. > > if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { > if (irqchip_in_kernel(vcpu->kvm) && vcpu->apic) > kvm_lapic_reset(vcpu); > kvm_vcpu_block(vcpu); > vcpu_put(vcpu); > return -EAGAIN; > } > > You can put a goto to the top of the loop to redo the mmu reload. In any case you need to do that because you don't want to execute the reset code with interrupts and preemption disabled. >>> + while (test_bit(KVM_FROZEN, &vcpu->requests)) >>> + schedule(); >>> >>> >> I don't think we need to wait here. >> > > The VCPU may be executing in kernel still, which may modify kernel > device state. E.g. A VCPU may be doing PIO emulating. > > In that case we will wait when taking kvm->lock. > If BSP reset the kernel devices earlier than the VCPU modify the device > state, > we are in trouble. > Right, so the logic is: - send IPI to all vcpus to stop them running - take the lock and reset all devices, srop the lock - unblock vcpu 0 > >>> + } >>> + else { >>> + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; >>> + kvm_lapic_reset(vcpu); >>> + } >>> + } >>> + /* Now only BSP is running... */ >>> + kvm_reset_devices(kvm); >>> >>> >> But now you're reseting the devices while vcpu 0 may be >> running. If in >> > > No, VCPU0 (BSP) is current VCPU (though you don't have the current > vcpu parameter explicitly) like mentioned in previous mail and > as pre-requirement of user level change. Please refer my abswer above > of this mail. > > We can't rely on user space not to cause host kernel corruption. >> the first stage you halt all vcpus, and then restart vcpu 0 after the >> reset, you avoid the race. >> >> > > No race here. The code is executing in VCPU0 context. > > It isn't guaranteed. It's okay to rely on it for guest correctness (though undesirable), not host correctness. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470E130D.6080808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470E130D.6080808-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-12 1:07 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364C43-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-12 1:07 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel >> >> Current VP wake up logic thru INIT/SIPI doesn't support this when >> irqchip in kernel. >> >> > > Doesn't this code imply that waiting for SIPI is supported? It is supported to wake up VCPU in kernel, but can't wake up the VCPU in user level since irqchip_in_kernel is TRUE here. vcpu->mp_state doesn't export to user level. >> > > You can put a goto to the top of the loop to redo the mmu reload. In > any case you need to do that because you don't want to execute > the reset > code with interrupts and preemption disabled. A goto cross function? It is too aggresive and bad code style IMO. The vcpu->request check is in __vcpu_run, while entering block state is in its parent function kvm_vcpu_ioctl_run. But if you want, we can return a special value, say REQUEST_INTERNAL_LOOP, to kvm_vcpu_ioctl_run and let kvm_vcpu_ioctl_run use sepcial logic to do goto within function if it see the special return value REQUEST_INTERNAL_LOOP. But is it cleaner? Also we will add more kernel to user EXIT reason, such as RESET request from kernel sensored guest tripple fault etc. >> >> The VCPU may be executing in kernel still, which may modify kernel >> device state. E.g. A VCPU may be doing PIO emulating. >> >> > > In that case we will wait when taking kvm->lock. Lock doesn't help. Lock can only avoid no 2+ modifcation in same time. But what we care if all other VCPUs can't do modification after BSP do device reset. It is different semantics. Maybe you are still arguing it is the AP who do RESET ops. Let us go to next discussion first. > >> If BSP reset the kernel devices earlier than the VCPU modify the >> device state, we are in trouble. >> >> No, VCPU0 (BSP) is current VCPU (though you don't have the current >> vcpu parameter explicitly) like mentioned in previous mail and >> as pre-requirement of user level change. Please refer my abswer >> above of this mail. >> >> > > We can't rely on user space not to cause host kernel corruption. ??? Even an AP trigger RESET, it just sets a reset_request flag in user level. It is another VCPU who will execute RESET operation. It seems the argument is who should do the RESET operation, say RST_CPU. BSP only or AP too. For me, since after RESET only BSP can execute, and the thread executing qemu_system_reset will continously execute (after RESET kernel) per current Qemu code, so what we can do is: 1: RST_CPU=BSP. Then BSP does qemu_system_reset, or 2: RST_CPU = AP, say RAP, does qemu_system_reset, user level then need to block RAP after qemu_system_reset and wake up BSP to take over. A point here we can't blcok RAP in case 2 at kernel RESET time, since kernel RESET may be not the last step of qemu_system_reset. It may go to kernel again. If we go with #1, just 1 line change as in my previous mail. If we go with #2, we have to add a new ABI for the AP to enter kernel wait for INIT/SIPI/SIPI state, otherwise normal INIT/SIPI/SIPI couldn't wake it up. I see much complicate in #2 while #1 has same functionality but simple. thanks, eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A02364C43-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364C43-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-12 6:18 ` Avi Kivity [not found] ` <470F11B9.4050501-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-12 6:18 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: >>> Current VP wake up logic thru INIT/SIPI doesn't support this when >>> irqchip in kernel. >>> >>> >>> >> Doesn't this code imply that waiting for SIPI is supported? >> > > It is supported to wake up VCPU in kernel, but can't wake up the VCPU > in user level since irqchip_in_kernel is TRUE here. vcpu->mp_state > doesn't export to user level. > > We never sleep in user level if irqchip_in_kernel(). So the thread will eventually go back to kernel mode. >> You can put a goto to the top of the loop to redo the mmu reload. In >> any case you need to do that because you don't want to execute >> the reset >> code with interrupts and preemption disabled. >> > > A goto cross function? It is too aggresive and bad code style IMO. > The vcpu->request check is in __vcpu_run, while entering block > state is in its parent function kvm_vcpu_ioctl_run. > > goto the label 'again' in __vcpu_run(), which has the call to kvm_mmu_reload(). > But if you want, we can return a special value, > say REQUEST_INTERNAL_LOOP, to > kvm_vcpu_ioctl_run and let kvm_vcpu_ioctl_run use sepcial logic > to do goto within function if it see the special return value > REQUEST_INTERNAL_LOOP. But is it cleaner? > > Also we will add more kernel to user EXIT reason, such as RESET request > from kernel sensored guest tripple fault etc. > > There is already a triple fault exit code. >>> The VCPU may be executing in kernel still, which may modify kernel >>> device state. E.g. A VCPU may be doing PIO emulating. >>> >>> >>> >> In that case we will wait when taking kvm->lock. >> > > Lock doesn't help. Lock can only avoid no 2+ modifcation > in same time. But what we care if all other VCPUs can't > do modification after BSP do device reset. > It is different semantics. > > Maybe you are still arguing it is the AP who do RESET ops. > Let us go to next discussion first. > > We first halt all vcpus, then take the lock. So: - other processors won't start after the device reset because they are halted - we won't do the reset concurrently with other processors because of the lock >>> If BSP reset the kernel devices earlier than the VCPU modify the >>> device state, we are in trouble. >>> >>> No, VCPU0 (BSP) is current VCPU (though you don't have the current >>> vcpu parameter explicitly) like mentioned in previous mail and >>> as pre-requirement of user level change. Please refer my abswer >>> above of this mail. >>> >>> >>> >> We can't rely on user space not to cause host kernel corruption. >> > > ??? > > Even an AP trigger RESET, it just sets a reset_request flag in user > level. > It is another VCPU who will execute RESET operation. > > It seems the argument is who should do the RESET operation, > say RST_CPU. BSP only or AP too. For me, since after RESET > only BSP can execute, and the thread executing > qemu_system_reset will continously execute > (after RESET kernel) per current Qemu code, so what we can do is: > > 1: RST_CPU=BSP. Then BSP does qemu_system_reset, or > 2: RST_CPU = AP, say RAP, does qemu_system_reset, user level > then > need to block RAP after qemu_system_reset and wake up BSP to take over. > A point here we can't blcok RAP in case 2 at kernel RESET time, > since > kernel RESET may be not the last step of qemu_system_reset. It may go > to kernel again. > > If we go with #1, just 1 line change as in my previous mail. > If we go with #2, we have to add a new ABI for the AP to enter > kernel > wait for INIT/SIPI/SIPI state, otherwise normal INIT/SIPI/SIPI couldn't > wake it up. > > I see much complicate in #2 while #1 has same functionality but > simple. > > My view is: - vcpu threads never sleep in userspace. they will always eventually end up in the kernel so we can stop or restart them there - reset is a platform API so it can't be dependent on which vcpu thread executes it (if any; it may be executed from an unrelated thread, remember we plan to separate the qemu signal handling code into a separate thread) - we already have a way to send messages to other vcpus So it seems to me everything is in place to make it fairly simple. I'll try writing a patch that does what I mean and post it. Either I'll convince you that in-kernel is simpler, or I'll convince myself that it is harder. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <470F11B9.4050501-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <470F11B9.4050501-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-12 7:17 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364F85-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-12 7:17 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel > I'll try writing a patch that does what I mean and post it. > Either I'll > convince you that in-kernel is simpler, or I'll convince myself that > it is harder. > OK, let us see which one is simple. BTW, you have swapped to N+1 SMP model in this discussion which is not there yet. And this is the difference. For me N+1 model means the device emulation will go to the N+1 thread while all CPU execution will still be in its own thread. It looks like you are proposing different. Eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A02364F85-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A02364F85-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-13 7:16 ` Avi Kivity [not found] ` <471070D8.7030402-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-13 7:16 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: >> I'll try writing a patch that does what I mean and post it. >> Either I'll >> convince you that in-kernel is simpler, or I'll convince myself that >> it is harder. >> >> > OK, let us see which one is simple. > > BTW, you have swapped to N+1 SMP model in this discussion which > is not there yet. And this is the difference. > > What I'm saying is that it should work with both models. > For me N+1 model means the device emulation will go to the N+1 thread > while all CPU execution will still be in its own thread. It looks like > you > are proposing different. > N+1, for me is that synchronous device emulation (like pio and mmio) happens in in the vcpu thread and asynchronous device emulation (handling signals in qemu, performing dma, and injecting interrupts) happens in the device thread. This minimizes context switching and heavyweight exits. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <471070D8.7030402-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <471070D8.7030402-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-15 4:40 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A023A6DFE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Dong, Eddie @ 2007-10-15 4:40 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel >N+1, for me is that synchronous device emulation (like pio and mmio) >happens in in the vcpu thread and asynchronous device emulation >(handling signals in qemu, performing dma, and injecting interrupts) >happens in the device thread. This minimizes context switching and >heavyweight exits. > If this is true, then the N+1 thread won't be able to execute qemu_system_reset which is in VCPU contents, nor can asyn I/O call back APIs. Thx,eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A023A6DFE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A023A6DFE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-15 9:10 ` Avi Kivity [not found] ` <47132E9D.7030500-atKUWr5tajBWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 24+ messages in thread From: Avi Kivity @ 2007-10-15 9:10 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: >> N+1, for me is that synchronous device emulation (like pio and mmio) >> happens in in the vcpu thread and asynchronous device emulation >> (handling signals in qemu, performing dma, and injecting interrupts) >> happens in the device thread. This minimizes context switching and >> heavyweight exits. >> >> > If this is true, then the N+1 thread won't be able to execute > qemu_system_reset > which is in VCPU contents, I don't understand. It can certainly access any qemu object (after taking qemu_mutex), and it can call any kvm vm ioctl. > nor can asyn I/O call back APIs. > Do you mean signal handlers? Sure, but we wait for socket I/O in select() and even dequeue signals synchronously. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <47132E9D.7030500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <47132E9D.7030500-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-16 8:44 ` Dong, Eddie [not found] ` <10EA09EFD8728347A513008B6B0DA77A023A763C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> [not found] ` <471491A9. 8040207@qumranet.com> 0 siblings, 2 replies; 24+ messages in thread From: Dong, Eddie @ 2007-10-16 8:44 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel Avi Kivity wrote: > Dong, Eddie wrote: >>> N+1, for me is that synchronous device emulation (like pio and mmio) >>> happens in in the vcpu thread and asynchronous device emulation >>> (handling signals in qemu, performing dma, and injecting interrupts) >>> happens in the device thread. This minimizes context switching and >>> heavyweight exits. >>> >>> >> If this is true, then the N+1 thread won't be able to execute >> qemu_system_reset which is in VCPU contents, > > I don't understand. It can certainly access any qemu object (after > taking qemu_mutex), and it can call any kvm vm ioctl. It can in theory, but do we have the real usage for qemu_system_reset which is the only caller of KERNEL RESET. > > >> nor can asyn I/O call back APIs. >> > > Do you mean signal handlers? Sure, but we wait for socket I/O in > select() and even dequeue signals synchronously. > > Qemu_system_reset is called only when a VCPU thread is doing. Anyway, wanna to see your whole proposal. thx,eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <10EA09EFD8728347A513008B6B0DA77A023A763C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A023A763C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2007-10-16 10:25 ` Avi Kivity 0 siblings, 0 replies; 24+ messages in thread From: Avi Kivity @ 2007-10-16 10:25 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Avi Kivity wrote: > >> Dong, Eddie wrote: >> >>>> N+1, for me is that synchronous device emulation (like pio and mmio) >>>> happens in in the vcpu thread and asynchronous device emulation >>>> (handling signals in qemu, performing dma, and injecting interrupts) >>>> happens in the device thread. This minimizes context switching and >>>> heavyweight exits. >>>> >>>> >>>> >>> If this is true, then the N+1 thread won't be able to execute >>> qemu_system_reset which is in VCPU contents, >>> >> I don't understand. It can certainly access any qemu object (after >> taking qemu_mutex), and it can call any kvm vm ioctl. >> > > It can in theory, but do we have the real usage for > qemu_system_reset which is the only caller of > KERNEL RESET. > > Well, if the guest resets itself, then reset is called from the vcpu thread. If we reset from the qemu monitor, it can be called from a non-vcpu thread. >>> >> Do you mean signal handlers? Sure, but we wait for socket I/O in >> select() and even dequeue signals synchronously. >> >> >> > Qemu_system_reset is called only when a VCPU thread is doing. > > Anyway, wanna to see your whole proposal. > Yes, I'll prepare and post patched for review. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
[parent not found: <471491A9. 8040207@qumranet.com>]
[parent not found: <471491A9.8040207-atKUWr5tajBWk0Htik3J/w@public.gmane.org>]
* Re: kernel device reset support [not found] ` <471491A9.8040207-atKUWr5tajBWk0Htik3J/w@public.gmane.org> @ 2007-10-17 1:40 ` Dong, Eddie 0 siblings, 0 replies; 24+ messages in thread From: Dong, Eddie @ 2007-10-17 1:40 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel Avi Kivity wrote: >> >> It can in theory, but do we have the real usage for >> qemu_system_reset which is the only caller of >> KERNEL RESET. >> >> > > Well, if the guest resets itself, then reset is called from the vcpu > thread. If we reset from the qemu monitor, it can be called from a > non-vcpu thread. > OK, now we know where the difference understanding comes from. But I have different opnion here: Qemu monitor reset --> do_system_reset -> qemu_system_reset_request -> reset_requested = 1; Above ops happens in non-vcpu thread, but this thread doesn't do real reset ops. It is the VCPU thread which will do qemu_system_reset if reset_requested == 1 later on. Maybe I still miss your point :-( Eddie ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: kernel device reset support [not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BB36-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org> 2007-10-08 10:24 ` Avi Kivity @ 2007-10-08 10:27 ` Avi Kivity 1 sibling, 0 replies; 24+ messages in thread From: Avi Kivity @ 2007-10-08 10:27 UTC (permalink / raw) To: Dong, Eddie; +Cc: kvm-devel Dong, Eddie wrote: > Kernel side patch to introduce a new API for kernel device reset and > force > vcpu mp_state to UNINATIALIZED state if it is reset. > thx,eddie > > + > +/* > + * Reset VM. > + * > + */ > +int kvm_vm_reset(struct kvm *kvm) > +{ > + struct kvm_vcpu *vcpu; > + int i; > + > + kvm_reset_devices(kvm); > Need to take kvm->lock around this. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2007-10-17 1:40 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-08 10:17 kernel device reset support Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BB36-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-08 10:24 ` Avi Kivity
[not found] ` <470A0556.80903-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-09 1:58 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0231BD83-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-09 9:34 ` Avi Kivity
[not found] ` <470B4B2E.1000500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-09 9:53 ` Avi Kivity
2007-10-09 10:11 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0231C1AA-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-09 10:17 ` Avi Kivity
[not found] ` <470B5528.2010605-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-09 10:36 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A0231C1B2-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-09 10:55 ` Avi Kivity
[not found] ` <470B5E3B.4060006-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-10 6:17 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A02364242-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-10 10:23 ` Avi Kivity
[not found] ` <470CA814.9050907-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-11 1:32 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A02364638-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-11 7:24 ` Dong, Eddie
2007-10-11 12:11 ` Avi Kivity
[not found] ` <470E130D.6080808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-12 1:07 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A02364C43-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-12 6:18 ` Avi Kivity
[not found] ` <470F11B9.4050501-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-12 7:17 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A02364F85-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-13 7:16 ` Avi Kivity
[not found] ` <471070D8.7030402-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-15 4:40 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A023A6DFE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-15 9:10 ` Avi Kivity
[not found] ` <47132E9D.7030500-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-16 8:44 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A023A763C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-10-16 10:25 ` Avi Kivity
[not found] ` <471491A9. 8040207@qumranet.com>
[not found] ` <471491A9.8040207-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-10-17 1:40 ` Dong, Eddie
2007-10-08 10:27 ` Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox