* [Qemu-devel] [PATCH 1/2] kvm: make vcpu life cycle separated from kvm instance
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
@ 2011-11-25 2:35 ` Liu Ping Fan
2011-11-27 10:36 ` Avi Kivity
2011-11-25 17:54 ` [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Jan Kiszka
` (7 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-25 2:35 UTC (permalink / raw)
To: kvm, qemu-devel
Cc: aliguori, Liu Ping Fan, linux-kernel, ryanh, jan.kiszka, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
Currently, vcpu can be destructed only when kvm instance destroyed.
Change this to vcpu as a refer to kvm, and then vcpu MUST and CAN be
destroyed before kvm's destroy. Qemu will take advantage of this to
exit the vcpu thread if the thread is no longer in use by guest.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
arch/x86/kvm/x86.c | 28 ++++++++--------------------
include/linux/kvm_host.h | 2 ++
virt/kvm/kvm_main.c | 31 +++++++++++++++++++++++++++++--
3 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7..ea2315a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6560,27 +6560,16 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
vcpu_put(vcpu);
}
-static void kvm_free_vcpus(struct kvm *kvm)
+void kvm_arch_vcpu_zap(struct kref *ref)
{
- unsigned int i;
- struct kvm_vcpu *vcpu;
-
- /*
- * Unpin any mmu pages first.
- */
- kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_clear_async_pf_completion_queue(vcpu);
- kvm_unload_vcpu_mmu(vcpu);
- }
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_free(vcpu);
-
- mutex_lock(&kvm->lock);
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
+ struct kvm_vcpu *vcpu = container_of(ref, struct kvm_vcpu, refcount);
+ struct kvm *kvm = vcpu->kvm;
- atomic_set(&kvm->online_vcpus, 0);
- mutex_unlock(&kvm->lock);
+ printk(KERN_INFO "%s, zap vcpu:0x%x\n", __func__, vcpu->vcpu_id);
+ kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_unload_vcpu_mmu(vcpu);
+ kvm_arch_vcpu_free(vcpu);
+ kvm_put_kvm(kvm);
}
void kvm_arch_sync_events(struct kvm *kvm)
@@ -6594,7 +6583,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
- kvm_free_vcpus(kvm);
if (kvm->arch.apic_access_page)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d526231..fe35078 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -113,6 +113,7 @@ enum {
struct kvm_vcpu {
struct kvm *kvm;
+ struct kref refcount;
#ifdef CONFIG_PREEMPT_NOTIFIERS
struct preempt_notifier preempt_notifier;
#endif
@@ -460,6 +461,7 @@ void kvm_arch_exit(void);
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_zap(struct kref *ref);
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d9cfb78..f166bc8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -580,6 +580,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_free_vm(kvm);
hardware_disable_all();
mmdrop(mm);
+ printk(KERN_INFO "%s finished\n", __func__);
}
void kvm_get_kvm(struct kvm *kvm)
@@ -1503,6 +1504,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
+void kvm_vcpu_get(struct kvm_vcpu *vcpu)
+{
+ kref_get(&vcpu->refcount);
+}
+
+void kvm_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kref_put(&vcpu->refcount, kvm_arch_vcpu_zap);
+}
+
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
*/
@@ -1623,8 +1634,13 @@ static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_vcpu_release(struct inode *inode, struct file *filp)
{
struct kvm_vcpu *vcpu = filp->private_data;
+ struct kvm *kvm = vcpu->kvm;
- kvm_put_kvm(vcpu->kvm);
+ filp->private_data = NULL;
+ mutex_lock(&kvm->lock);
+ atomic_sub(1, &kvm->online_vcpus);
+ mutex_unlock(&kvm->lock);
+ kvm_vcpu_put(vcpu);
return 0;
}
@@ -1646,6 +1662,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
}
+static struct kvm_vcpu *kvm_vcpu_create(struct kvm *kvm, u32 id)
+{
+ struct kvm_vcpu *vcpu;
+ vcpu = kvm_arch_vcpu_create(kvm, id);
+ if (IS_ERR(vcpu))
+ return vcpu;
+
+ kref_init(&vcpu->refcount);
+ return vcpu;
+}
+
/*
* Creates some virtual cpus. Good luck creating more than one.
*/
@@ -1654,7 +1681,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
int r;
struct kvm_vcpu *vcpu, *v;
- vcpu = kvm_arch_vcpu_create(kvm, id);
+ vcpu = kvm_vcpu_create(kvm, id);
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] kvm: make vcpu life cycle separated from kvm instance
2011-11-25 2:35 ` [Qemu-devel] [PATCH 1/2] kvm: make vcpu life cycle separated from kvm instance Liu Ping Fan
@ 2011-11-27 10:36 ` Avi Kivity
0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2011-11-27 10:36 UTC (permalink / raw)
To: Liu Ping Fan
Cc: aliguori, Liu Ping Fan, kvm, qemu-devel, linux-kernel, ryanh,
jan.kiszka
On 11/25/2011 04:35 AM, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Currently, vcpu can be destructed only when kvm instance destroyed.
> Change this to vcpu as a refer to kvm, and then vcpu MUST and CAN be
> destroyed before kvm's destroy. Qemu will take advantage of this to
> exit the vcpu thread if the thread is no longer in use by guest.
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
> arch/x86/kvm/x86.c | 28 ++++++++--------------------
> include/linux/kvm_host.h | 2 ++
> virt/kvm/kvm_main.c | 31 +++++++++++++++++++++++++++++--
> 3 files changed, 39 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c38efd7..ea2315a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6560,27 +6560,16 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
> vcpu_put(vcpu);
> }
>
> -static void kvm_free_vcpus(struct kvm *kvm)
> +void kvm_arch_vcpu_zap(struct kref *ref)
> {
> - unsigned int i;
> - struct kvm_vcpu *vcpu;
> -
> - /*
> - * Unpin any mmu pages first.
> - */
> - kvm_for_each_vcpu(i, vcpu, kvm) {
> - kvm_clear_async_pf_completion_queue(vcpu);
> - kvm_unload_vcpu_mmu(vcpu);
> - }
> - kvm_for_each_vcpu(i, vcpu, kvm)
> - kvm_arch_vcpu_free(vcpu);
> -
> - mutex_lock(&kvm->lock);
> - for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
> - kvm->vcpus[i] = NULL;
> + struct kvm_vcpu *vcpu = container_of(ref, struct kvm_vcpu, refcount);
> + struct kvm *kvm = vcpu->kvm;
>
> - atomic_set(&kvm->online_vcpus, 0);
> - mutex_unlock(&kvm->lock);
> + printk(KERN_INFO "%s, zap vcpu:0x%x\n", __func__, vcpu->vcpu_id);
> + kvm_clear_async_pf_completion_queue(vcpu);
> + kvm_unload_vcpu_mmu(vcpu);
> + kvm_arch_vcpu_free(vcpu);
> + kvm_put_kvm(kvm);
> }
>
> void kvm_arch_sync_events(struct kvm *kvm)
> @@ -6594,7 +6583,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
> kvm_iommu_unmap_guest(kvm);
> kfree(kvm->arch.vpic);
> kfree(kvm->arch.vioapic);
> - kvm_free_vcpus(kvm);
> if (kvm->arch.apic_access_page)
> put_page(kvm->arch.apic_access_page);
> if (kvm->arch.ept_identity_pagetable)
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d526231..fe35078 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -113,6 +113,7 @@ enum {
>
> struct kvm_vcpu {
> struct kvm *kvm;
> + struct kref refcount;
> #ifdef CONFIG_PREEMPT_NOTIFIERS
> struct preempt_notifier preempt_notifier;
> #endif
> @@ -460,6 +461,7 @@ void kvm_arch_exit(void);
> int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
> void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
>
> +void kvm_arch_vcpu_zap(struct kref *ref);
> void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
> void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d9cfb78..f166bc8 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -580,6 +580,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
> kvm_arch_free_vm(kvm);
> hardware_disable_all();
> mmdrop(mm);
> + printk(KERN_INFO "%s finished\n", __func__);
> }
>
> void kvm_get_kvm(struct kvm *kvm)
> @@ -1503,6 +1504,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
> mark_page_dirty_in_slot(kvm, memslot, gfn);
> }
>
> +void kvm_vcpu_get(struct kvm_vcpu *vcpu)
> +{
> + kref_get(&vcpu->refcount);
> +}
> +
> +void kvm_vcpu_put(struct kvm_vcpu *vcpu)
> +{
> + kref_put(&vcpu->refcount, kvm_arch_vcpu_zap);
> +}
> +
> /*
> * The vCPU has executed a HLT instruction with in-kernel mode enabled.
> */
> @@ -1623,8 +1634,13 @@ static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
> static int kvm_vcpu_release(struct inode *inode, struct file *filp)
> {
> struct kvm_vcpu *vcpu = filp->private_data;
> + struct kvm *kvm = vcpu->kvm;
>
> - kvm_put_kvm(vcpu->kvm);
> + filp->private_data = NULL;
> + mutex_lock(&kvm->lock);
> + atomic_sub(1, &kvm->online_vcpus);
> + mutex_unlock(&kvm->lock);
> + kvm_vcpu_put(vcpu);
> return 0;
> }
>
> @@ -1646,6 +1662,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
> return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
> }
>
> +static struct kvm_vcpu *kvm_vcpu_create(struct kvm *kvm, u32 id)
> +{
> + struct kvm_vcpu *vcpu;
> + vcpu = kvm_arch_vcpu_create(kvm, id);
> + if (IS_ERR(vcpu))
> + return vcpu;
> +
> + kref_init(&vcpu->refcount);
> + return vcpu;
> +}
> +
> /*
> * Creates some virtual cpus. Good luck creating more than one.
> */
> @@ -1654,7 +1681,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
> int r;
> struct kvm_vcpu *vcpu, *v;
>
> - vcpu = kvm_arch_vcpu_create(kvm, id);
> + vcpu = kvm_vcpu_create(kvm, id);
> if (IS_ERR(vcpu))
> return PTR_ERR(vcpu);
>
I don't think this is sufficient to actually remove a vcpu from the vcpu
table. It may be referred to from other vcpus in the local APIC code.
Practically the only thing that can accomplish this without a
substantial effort is rcu.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
2011-11-25 2:35 ` [Qemu-devel] [PATCH 1/2] kvm: make vcpu life cycle separated from kvm instance Liu Ping Fan
@ 2011-11-25 17:54 ` Jan Kiszka
2011-11-27 3:07 ` Liu ping fan
2011-11-27 2:42 ` [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD Liu Ping Fan
` (6 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Jan Kiszka @ 2011-11-25 17:54 UTC (permalink / raw)
To: Liu Ping Fan; +Cc: aliguori, kvm, qemu-devel, linux-kernel, ryanh, avi
[-- Attachment #1: Type: text/plain, Size: 2029 bytes --]
On 2011-11-25 00:35, Liu Ping Fan wrote:
> A series of patches from kvm, qemu to guest. These patches will finally enable vcpu destruction in kvm instance and let vcpu thread exit in qemu.
>
> Currently, the vcpu online feature enables the dynamical creation of vcpu and vcpu thread, while the offline feature can not destruct the vcpu and let vcpu thread exit, it just halt in kvm. Because currently, the vcpu will only be destructed when kvm instance is destroyed. We can
> change vcpu as an refer of kvm instance, and then vcpu's destruction MUST and CAN come before kvm's destruction.
>
> These patches use guest driver to notify the CPU_DEAD event to qemu, and later qemu asks kvm to release the dead vcpu and finally exit the
> thread.
> The usage is:
> qemu$cpu_set n online
> qemu$cpu_set n zap ------------ This will destroy the vcpu-n in kvm and let vcpu thread exit
> OR
> qemu$cpu_set n offline --------- This will just block vcpu-n in kvm
>
> Any comment and suggestion are welcome.
The cpu_set command will probably not make it to QEMU upstream
(device_add/delete is the way to go - IMHO). So I would refrain from
adding anything to qemu-kvm at this point anyway. Also, what would be
the advantage of 'zap' from user perspective?
>
>
> Patches include:
> |-- guest
> | `-- 0001-virtio-add-a-pci-driver-to-notify-host-the-CPU_DEAD-.patch
> |-- kvm
> | |-- 0001-kvm-make-vcpu-life-cycle-separated-from-kvm-instance.patch
> | `-- 0002-kvm-exit-to-userspace-with-reason-KVM_EXIT_VCPU_DEAD.patch
> `-- qemu
> |-- 0001-Add-cpu_phyid_to_cpu-to-map-cpu-phyid-to-CPUState.patch
> |-- 0002-Add-cpu_free-to-support-arch-related-CPUState-releas.patch
> |-- 0003-Introduce-a-pci-device-cpustate-to-get-CPU_DEAD-even.patch
> |-- 0004-Release-vcpu-and-finally-exit-vcpu-thread-safely.patch
> `-- 0005-tmp-patches-for-linux-header-files.patch
>
I only found kvm patch 0001 so far. Something probably went wrong with
your postings.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm
2011-11-25 17:54 ` [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Jan Kiszka
@ 2011-11-27 3:07 ` Liu ping fan
0 siblings, 0 replies; 19+ messages in thread
From: Liu ping fan @ 2011-11-27 3:07 UTC (permalink / raw)
To: Jan Kiszka; +Cc: aliguori, kvm, qemu-devel, linux-kernel, ryanh, avi
On Sat, Nov 26, 2011 at 1:54 AM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-11-25 00:35, Liu Ping Fan wrote:
>> A series of patches from kvm, qemu to guest. These patches will finally enable vcpu destruction in kvm instance and let vcpu thread exit in qemu.
>>
>> Currently, the vcpu online feature enables the dynamical creation of vcpu and vcpu thread, while the offline feature can not destruct the vcpu and let vcpu thread exit, it just halt in kvm. Because currently, the vcpu will only be destructed when kvm instance is destroyed. We can
>> change vcpu as an refer of kvm instance, and then vcpu's destruction MUST and CAN come before kvm's destruction.
>>
>> These patches use guest driver to notify the CPU_DEAD event to qemu, and later qemu asks kvm to release the dead vcpu and finally exit the
>> thread.
>> The usage is:
>> qemu$cpu_set n online
>> qemu$cpu_set n zap ------------ This will destroy the vcpu-n in kvm and let vcpu thread exit
>> OR
>> qemu$cpu_set n offline --------- This will just block vcpu-n in kvm
>>
>> Any comment and suggestion are welcome.
>
> The cpu_set command will probably not make it to QEMU upstream
> (device_add/delete is the way to go - IMHO). So I would refrain from
> adding anything to qemu-kvm at this point anyway.
>
Ok, I will see more details in device_add/delete.
> Also, what would be> the advantage of 'zap' from user perspective?
>
Suppose we increase one user's cpu's utilization by creating more
threads for them (of course, task_group is another choice), later we
decide to reclaim the utilization from this user, so we remove some of
the vcpu from this user's guest OS. But the related vcpu structure are
not released in kernel in current code, and wasted.
>From another viewpoint, if we can dynamically create the vcpu & vcpu
thread, we had better to have the ability to dynamically destroy them.
>>
>>
>> Patches include:
>> |-- guest
>> | `-- 0001-virtio-add-a-pci-driver-to-notify-host-the-CPU_DEAD-.patch
>> |-- kvm
>> | |-- 0001-kvm-make-vcpu-life-cycle-separated-from-kvm-instance.patch
>> | `-- 0002-kvm-exit-to-userspace-with-reason-KVM_EXIT_VCPU_DEAD.patch
>> `-- qemu
>> |-- 0001-Add-cpu_phyid_to_cpu-to-map-cpu-phyid-to-CPUState.patch
>> |-- 0002-Add-cpu_free-to-support-arch-related-CPUState-releas.patch
>> |-- 0003-Introduce-a-pci-device-cpustate-to-get-CPU_DEAD-even.patch
>> |-- 0004-Release-vcpu-and-finally-exit-vcpu-thread-safely.patch
>> `-- 0005-tmp-patches-for-linux-header-files.patch
>>
>
> I only found kvm patch 0001 so far. Something probably went wrong with
> your postings.
>
Sorry, I have resent them, pls re-fetch them .
Thanks and regards,
ping fan
> Jan
>
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
2011-11-25 2:35 ` [Qemu-devel] [PATCH 1/2] kvm: make vcpu life cycle separated from kvm instance Liu Ping Fan
2011-11-25 17:54 ` [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Jan Kiszka
@ 2011-11-27 2:42 ` Liu Ping Fan
2011-11-27 10:36 ` Avi Kivity
2011-11-27 2:45 ` [Qemu-devel] [PATCH 1/5] QEMU Add cpu_phyid_to_cpu() to map cpu phyid to CPUState Liu Ping Fan
` (5 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:42 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
The vcpu can be safely released when
--1.guest tells us that the vcpu is not needed any longer.
--2.vcpu hits the last instruction _halt_
If both of the conditions are satisfied, kvm exits to userspace
with the reason vcpu dead. So the user thread can exit safely.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
arch/x86/kvm/x86.c | 16 ++++++++++++++++
include/linux/kvm.h | 11 +++++++++++
include/linux/kvm_host.h | 1 +
3 files changed, 28 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ea2315a..7948eaf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5825,11 +5825,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted)
r = vcpu_enter_guest(vcpu);
else {
+retry:
+ if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+ /*1st check whether guest notify CPU_DEAD*/
+ if (vcpu->state == KVM_VCPU_STATE_DYING) {
+ vcpu->state = KVM_VCPU_STATE_DEAD;
+ vcpu->run->exit_reason = KVM_EXIT_VCPU_DEAD;
+ break;
+ }
+ }
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
{
+ switch (vcpu->state) {
+ case KVM_VCPU_STATE_DYING:
+ r = 1;
+ goto retry;
+ default:
+ break;
+ }
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
vcpu->arch.mp_state =
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c3892fc..d5ff3f7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -162,6 +162,7 @@ struct kvm_pit_config {
#define KVM_EXIT_INTERNAL_ERROR 17
#define KVM_EXIT_OSI 18
#define KVM_EXIT_PAPR_HCALL 19
+#define KVM_EXIT_VCPU_DEAD 20
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
@@ -334,6 +335,12 @@ struct kvm_signal_mask {
__u8 sigset[0];
};
+/*for KVM_VCPU_SET_STATE */
+struct kvm_vcpu_state {
+ int vcpu_id;
+ int state;
+};
+
/* for KVM_TPR_ACCESS_REPORTING */
struct kvm_tpr_access_ctl {
__u32 enabled;
@@ -354,6 +361,9 @@ struct kvm_vapic_addr {
#define KVM_MP_STATE_HALTED 3
#define KVM_MP_STATE_SIPI_RECEIVED 4
+#define KVM_VCPU_STATE_DYING 1
+#define KVM_VCPU_STATE_DEAD 2
+
struct kvm_mp_state {
__u32 mp_state;
};
@@ -762,6 +772,7 @@ struct kvm_clock_data {
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
/* Available with KVM_CAP_RMA */
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
+#define KVM_SETSTATE_VCPU _IOW(KVMIO, 0xaa, struct kvm_vcpu_state)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fe35078..6fdf927 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -114,6 +114,7 @@ enum {
struct kvm_vcpu {
struct kvm *kvm;
struct kref refcount;
+ int state;
#ifdef CONFIG_PREEMPT_NOTIFIERS
struct preempt_notifier preempt_notifier;
#endif
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD
2011-11-27 2:42 ` [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD Liu Ping Fan
@ 2011-11-27 10:36 ` Avi Kivity
2011-11-27 10:50 ` Gleb Natapov
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2011-11-27 10:36 UTC (permalink / raw)
To: Liu Ping Fan; +Cc: aliguori, kvm, qemu-devel, linux-kernel, ryanh, jan.kiszka
On 11/27/2011 04:42 AM, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> The vcpu can be safely released when
> --1.guest tells us that the vcpu is not needed any longer.
> --2.vcpu hits the last instruction _halt_
>
> If both of the conditions are satisfied, kvm exits to userspace
> with the reason vcpu dead. So the user thread can exit safely.
>
>
Seems to be completely unnecessary. If you want to exit from the vcpu
thread, send it a signal.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD
2011-11-27 10:36 ` Avi Kivity
@ 2011-11-27 10:50 ` Gleb Natapov
2011-11-28 7:16 ` Liu ping fan
0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2011-11-27 10:50 UTC (permalink / raw)
To: Avi Kivity
Cc: aliguori, kvm, linux-kernel, Liu Ping Fan, qemu-devel, ryanh,
jan.kiszka
On Sun, Nov 27, 2011 at 12:36:55PM +0200, Avi Kivity wrote:
> On 11/27/2011 04:42 AM, Liu Ping Fan wrote:
> > From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> >
> > The vcpu can be safely released when
> > --1.guest tells us that the vcpu is not needed any longer.
> > --2.vcpu hits the last instruction _halt_
> >
> > If both of the conditions are satisfied, kvm exits to userspace
> > with the reason vcpu dead. So the user thread can exit safely.
> >
> >
>
> Seems to be completely unnecessary. If you want to exit from the vcpu
> thread, send it a signal.
>
Also if guest "tells us that the vcpu is not needed any longer" (via
ACPI I presume) and vcpu actually doing something critical instead of
sitting in 1:hlt; jmp 1b loop then it is guest's problem if it stops
working after vcpu destruction.
--
Gleb.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD
2011-11-27 10:50 ` Gleb Natapov
@ 2011-11-28 7:16 ` Liu ping fan
2011-11-28 8:46 ` Gleb Natapov
0 siblings, 1 reply; 19+ messages in thread
From: Liu ping fan @ 2011-11-28 7:16 UTC (permalink / raw)
To: Avi Kivity, Gleb Natapov
Cc: aliguori, kvm, qemu-devel, linux-kernel, ryanh, jan.kiszka
On Sun, Nov 27, 2011 at 6:50 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Sun, Nov 27, 2011 at 12:36:55PM +0200, Avi Kivity wrote:
>> On 11/27/2011 04:42 AM, Liu Ping Fan wrote:
>> > From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> >
>> > The vcpu can be safely released when
>> > --1.guest tells us that the vcpu is not needed any longer.
>> > --2.vcpu hits the last instruction _halt_
>> >
>> > If both of the conditions are satisfied, kvm exits to userspace
>> > with the reason vcpu dead. So the user thread can exit safely.
>> >
>> >
>>
>> Seems to be completely unnecessary. If you want to exit from the vcpu
>> thread, send it a signal.
>>
Hi Avi and Gleb,
First, I wanted to make sure my assumption is right, so I can grab
your meaning more clearly -:). Could you elaborate it for me, thanks.
I had thought that when a vcpu was being removed from guest, kvm must
satisfy the following conditions to safely remove the vcpu:
--1. The tasks on vcpu in GUEST have already been migrated to other
vcpus and ONLY idle_task left ---- The CPU_DEAD is the checkpoint.
--2. We must wait the idle task to hit native_halt() in GUEST, till
that time, this vcpu is no needed even by idle_task. In KVM, the vcpu
thread will finally sit on "kvm_vcpu_block(vcpu);"
We CAN NOT suppose the sequence of the two condition because they come
from different threads. Am I right?
And here comes my question,
--1. I think the signal will make vcpu_run exit to user, but is it
allow vcpu thread to finally call "kernel/exit.c : void do_exit(long
code)" in current code in kvm or in qemu?
--2. If we got CPU_DEAD event, and then send a signal to vcpu thread,
could we ensure that we have already sit on "kvm_vcpu_block(vcpu);"
Thanks and regards,
ping fan
> Also if guest "tells us that the vcpu is not needed any longer" (via
> ACPI I presume) and vcpu actually doing something critical instead of
> sitting in 1:hlt; jmp 1b loop then it is guest's problem if it stops
> working after vcpu destruction.
>
> --
> Gleb.
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD
2011-11-28 7:16 ` Liu ping fan
@ 2011-11-28 8:46 ` Gleb Natapov
0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2011-11-28 8:46 UTC (permalink / raw)
To: Liu ping fan
Cc: aliguori, kvm, linux-kernel, qemu-devel, ryanh, jan.kiszka,
Avi Kivity
On Mon, Nov 28, 2011 at 03:16:01PM +0800, Liu ping fan wrote:
> On Sun, Nov 27, 2011 at 6:50 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Sun, Nov 27, 2011 at 12:36:55PM +0200, Avi Kivity wrote:
> >> On 11/27/2011 04:42 AM, Liu Ping Fan wrote:
> >> > From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> >> >
> >> > The vcpu can be safely released when
> >> > --1.guest tells us that the vcpu is not needed any longer.
> >> > --2.vcpu hits the last instruction _halt_
> >> >
> >> > If both of the conditions are satisfied, kvm exits to userspace
> >> > with the reason vcpu dead. So the user thread can exit safely.
> >> >
> >> >
> >>
> >> Seems to be completely unnecessary. If you want to exit from the vcpu
> >> thread, send it a signal.
> >>
> Hi Avi and Gleb,
>
> First, I wanted to make sure my assumption is right, so I can grab
> your meaning more clearly -:). Could you elaborate it for me, thanks.
>
> I had thought that when a vcpu was being removed from guest, kvm must
> satisfy the following conditions to safely remove the vcpu:
> --1. The tasks on vcpu in GUEST have already been migrated to other
> vcpus and ONLY idle_task left ---- The CPU_DEAD is the checkpoint.
> --2. We must wait the idle task to hit native_halt() in GUEST, till
> that time, this vcpu is no needed even by idle_task. In KVM, the vcpu
> thread will finally sit on "kvm_vcpu_block(vcpu);"
> We CAN NOT suppose the sequence of the two condition because they come
> from different threads. Am I right?
>
No, KVM can remove vcpu whenever it told to do so (may be not in the
middle of emulated io though). It is a guest responsibility to eject cpu
only when it is safe to do so from guest's point of view.
> And here comes my question,
> --1. I think the signal will make vcpu_run exit to user, but is it
> allow vcpu thread to finally call "kernel/exit.c : void do_exit(long
> code)" in current code in kvm or in qemu?
Yes. Why not?
> --2. If we got CPU_DEAD event, and then send a signal to vcpu thread,
> could we ensure that we have already sit on "kvm_vcpu_block(vcpu);"
CPU_DEAD event is internal to a guest (one of them). KVM does not care
about it. And to remove vcpu it does not have to sit in kvm_vcpu_block().
And actually since signal kicks vcpu thread out from kernel into userspace
you can be sure it is not sitting in kvm_vcpu_block().
>
> Thanks and regards,
> ping fan
>
> > Also if guest "tells us that the vcpu is not needed any longer" (via
> > ACPI I presume) and vcpu actually doing something critical instead of
> > sitting in 1:hlt; jmp 1b loop then it is guest's problem if it stops
> > working after vcpu destruction.
> >
>
>
> > --
> > Gleb.
> >
--
Gleb.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 1/5] QEMU Add cpu_phyid_to_cpu() to map cpu phyid to CPUState
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
` (2 preceding siblings ...)
2011-11-27 2:42 ` [Qemu-devel] [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD Liu Ping Fan
@ 2011-11-27 2:45 ` Liu Ping Fan
2011-11-27 2:45 ` [Qemu-devel] [PATCH 2/5] QEMU Add cpu_free() to support arch related CPUState release Liu Ping Fan
` (4 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:45 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
The guest has different cpu logic id from qemu, but they have the
same phyid. When cpu phyid is told by guest, we need to obtain
the corresponding CPUState.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
target-i386/cpu.h | 2 ++
target-i386/helper.c | 12 ++++++++++++
2 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index abdeb40..251e63b 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -767,6 +767,7 @@ typedef struct CPUX86State {
} CPUX86State;
CPUX86State *cpu_x86_init(const char *cpu_model);
+CPUX86State *x86_phyid_to_cpu(int phy_id);
int cpu_x86_exec(CPUX86State *s);
void cpu_x86_close(CPUX86State *s);
void x86_cpu_list (FILE *f, fprintf_function cpu_fprintf, const char *optarg);
@@ -1063,4 +1064,5 @@ void svm_check_intercept(CPUState *env1, uint32_t type);
uint32_t cpu_cc_compute_all(CPUState *env1, int op);
+#define cpu_phyid_to_cpu x86_phyid_to_cpu
#endif /* CPU_I386_H */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 5df40d4..e35a75e 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1263,6 +1263,18 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
return env;
}
+CPUX86State *x86_phyid_to_cpu(int phy_id)
+{
+ CPUX86State *env = first_cpu;
+ while (env) {
+ if (env->cpuid_apic_id == phy_id) {
+ break;
+ }
+ env = env->next_cpu;
+ }
+ return env;
+}
+
#if !defined(CONFIG_USER_ONLY)
void do_cpu_init(CPUState *env)
{
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 2/5] QEMU Add cpu_free() to support arch related CPUState release
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
` (3 preceding siblings ...)
2011-11-27 2:45 ` [Qemu-devel] [PATCH 1/5] QEMU Add cpu_phyid_to_cpu() to map cpu phyid to CPUState Liu Ping Fan
@ 2011-11-27 2:45 ` Liu Ping Fan
2011-11-27 2:45 ` [Qemu-devel] [PATCH 3/5] QEMU Introduce a pci device "cpustate" to get CPU_DEAD event in guest Liu Ping Fan
` (3 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:45 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
When exiting from vcpu thread, the CPUState must be freed firstly.
And the handling process is an arch related.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
hw/apic.c | 4 ++++
target-i386/cpu.h | 3 +++
target-i386/helper.c | 8 ++++++++
3 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/hw/apic.c b/hw/apic.c
index 34fa1dd..6472045 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -511,6 +511,10 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
}
}
}
+void apic_free(DeviceState *d)
+{
+ qdev_free(d);
+}
void apic_init_reset(DeviceState *d)
{
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 251e63b..da07781 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -767,6 +767,7 @@ typedef struct CPUX86State {
} CPUX86State;
CPUX86State *cpu_x86_init(const char *cpu_model);
+void cpu_x86_free(CPUState *env);
CPUX86State *x86_phyid_to_cpu(int phy_id);
int cpu_x86_exec(CPUX86State *s);
void cpu_x86_close(CPUX86State *s);
@@ -950,6 +951,7 @@ CPUState *pc_new_cpu(const char *cpu_model);
#define cpu_list_id x86_cpu_list
#define cpudef_setup x86_cpudef_setup
+#define cpu_free cpu_x86_free
#define CPU_SAVE_VERSION 12
/* MMU modes definitions */
@@ -1064,5 +1066,6 @@ void svm_check_intercept(CPUState *env1, uint32_t type);
uint32_t cpu_cc_compute_all(CPUState *env1, int op);
+void apic_free(DeviceState *d);
#define cpu_phyid_to_cpu x86_phyid_to_cpu
#endif /* CPU_I386_H */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index e35a75e..c9fadc3 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1263,6 +1263,14 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
return env;
}
+void cpu_x86_free(CPUState *env)
+{
+ if (env->apic_state != NULL) {
+ apic_free(env->apic_state);
+ }
+ g_free(env);
+}
+
CPUX86State *x86_phyid_to_cpu(int phy_id)
{
CPUX86State *env = first_cpu;
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 3/5] QEMU Introduce a pci device "cpustate" to get CPU_DEAD event in guest
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
` (4 preceding siblings ...)
2011-11-27 2:45 ` [Qemu-devel] [PATCH 2/5] QEMU Add cpu_free() to support arch related CPUState release Liu Ping Fan
@ 2011-11-27 2:45 ` Liu Ping Fan
2011-11-27 10:56 ` Gleb Natapov
2011-11-27 2:45 ` [Qemu-devel] [PATCH 4/5] QEMU Release vcpu and finally exit vcpu thread safely Liu Ping Fan
` (2 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:45 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
This device's driver in guest can get vcpu dead event and notify
qemu through the device.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
Makefile.target | 1 +
hw/pc_piix.c | 1 +
hw/pci.c | 22 +++++++++++
hw/pci.h | 1 +
hw/pci_cpustate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 130 insertions(+), 0 deletions(-)
create mode 100644 hw/pci_cpustate.c
diff --git a/Makefile.target b/Makefile.target
index 5607c6d..c822f9f 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -242,6 +242,7 @@ obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
obj-i386-y += testdev.o
obj-i386-y += acpi.o acpi_piix4.o
obj-i386-y += icc_bus.o
+obj-i386-y += pci_cpustate.o
obj-i386-y += pcspk.o i8254.o
obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 7c6f42d..090d7ba 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -199,6 +199,7 @@ static void pc_init1(MemoryRegion *system_memory,
pci_nic_init_nofail(nd, "rtl8139", NULL);
}
+ pc_cpustate_init(NULL);
ide_drive_get(hd, MAX_IDE_BUS);
if (pci_enabled) {
PCIDevice *dev;
diff --git a/hw/pci.c b/hw/pci.c
index 5c87a62..74a8975 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1663,6 +1663,28 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model,
return pci_dev;
}
+PCIDevice *pc_cpustate_init(const char *default_devaddr)
+{
+ const char *devaddr = default_devaddr;
+ PCIBus *bus;
+ int devfn;
+ PCIDevice *pci_dev;
+ DeviceState *dev;
+ bus = pci_get_bus_devfn(&devfn, devaddr);
+ if (!bus) {
+ error_report("Invalid PCI device address %s for device %s",
+ devaddr, "pcimmstub");
+ return NULL;
+ }
+
+ pci_dev = pci_create(bus, devfn, "cpustate");
+ dev = &pci_dev->qdev;
+ if (qdev_init(dev) < 0) {
+ return NULL;
+ }
+ return pci_dev;
+}
+
PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
const char *default_devaddr)
{
diff --git a/hw/pci.h b/hw/pci.h
index 071a044..bbaa013 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -279,6 +279,7 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model,
const char *default_devaddr);
PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
const char *default_devaddr);
+PCIDevice *pc_cpustate_init(const char *default_devaddr);
int pci_bus_num(PCIBus *s);
void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d));
PCIBus *pci_find_root_bus(int domain);
diff --git a/hw/pci_cpustate.c b/hw/pci_cpustate.c
new file mode 100644
index 0000000..fd31a1f
--- /dev/null
+++ b/hw/pci_cpustate.c
@@ -0,0 +1,105 @@
+/* pci_cpustate.c
+ * emulate a pci device to get guest os CPU_DEAD event
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+#include <zlib.h>
+#include "hw.h"
+#include "pci.h"
+#include "qemu-timer.h"
+#include "net.h"
+#include "loader.h"
+#include "sysemu.h"
+#include "iov.h"
+
+#define PCI_DEVICE_ID_CPUSTATE 0x1010
+#define CPUSTATE_REGS_SIZE 0x1000
+
+typedef struct VcpuState VcpuState;
+
+struct VcpuState {
+ PCIDevice dev;
+ MemoryRegion mmio;
+ int mmio_io_addr;
+ int mmio_index;
+ uint32_t cpuid;
+ uint32_t cpu_state;
+};
+
+static const VMStateDescription vmstate_cpustate = {
+ .name = "cpustate",
+ .version_id = 1,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void
+cpustate_mmio_write(void *opaque, target_phys_addr_t addr, uint64_t val,
+ unsigned size)
+{
+}
+
+static uint64_t
+cpustate_mmio_read(void *opaque, target_phys_addr_t addr, unsigned size)
+{
+ return 0;
+}
+
+static const MemoryRegionOps cpustate_ops = {
+ .read = cpustate_mmio_read,
+ .write = cpustate_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static int pci_cpustate_init(PCIDevice *dev)
+{
+ uint8_t *pci_cfg = dev->config;
+ VcpuState *s = DO_UPCAST(VcpuState, dev, dev);
+ memory_region_init_io(&s->mmio, &cpustate_ops, s, "cpustate",
+ CPUSTATE_REGS_SIZE);
+ pci_cfg[PCI_INTERRUPT_PIN] = 1;
+ /* I/O handler for memory-mapped I/O */
+ pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
+ return 0;
+}
+
+static int pci_cpustate_exit(PCIDevice *dev)
+{
+ return 0;
+}
+
+static PCIDeviceInfo cpustate_info = {
+ .qdev.name = "cpustate",
+ .qdev.size = sizeof(VcpuState),
+ .qdev.vmsd = &vmstate_cpustate,
+ .init = pci_cpustate_init,
+ .exit = pci_cpustate_exit,
+ .vendor_id = PCI_VENDOR_ID_IBM,
+ .device_id = PCI_DEVICE_ID_CPUSTATE,
+ .revision = 0x10,
+ .class_id = PCI_CLASS_SYSTEM_OTHER,
+ .qdev.props = (Property[]) {
+ DEFINE_PROP_END_OF_LIST(),
+ }
+};
+
+static void cpustate_register_devices(void)
+{
+ pci_qdev_register(&cpustate_info);
+}
+device_init(cpustate_register_devices)
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 3/5] QEMU Introduce a pci device "cpustate" to get CPU_DEAD event in guest
2011-11-27 2:45 ` [Qemu-devel] [PATCH 3/5] QEMU Introduce a pci device "cpustate" to get CPU_DEAD event in guest Liu Ping Fan
@ 2011-11-27 10:56 ` Gleb Natapov
0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2011-11-27 10:56 UTC (permalink / raw)
To: Liu Ping Fan
Cc: aliguori, kvm, qemu-devel, linux-kernel, ryanh, jan.kiszka, avi
On Sun, Nov 27, 2011 at 10:45:35AM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> This device's driver in guest can get vcpu dead event and notify
> qemu through the device.
>
This should be done through ACPI device. Look at how PCI hotplug works
in hw/acpi_piix4.c.
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
> Makefile.target | 1 +
> hw/pc_piix.c | 1 +
> hw/pci.c | 22 +++++++++++
> hw/pci.h | 1 +
> hw/pci_cpustate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 130 insertions(+), 0 deletions(-)
> create mode 100644 hw/pci_cpustate.c
>
> diff --git a/Makefile.target b/Makefile.target
> index 5607c6d..c822f9f 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -242,6 +242,7 @@ obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
> obj-i386-y += testdev.o
> obj-i386-y += acpi.o acpi_piix4.o
> obj-i386-y += icc_bus.o
> +obj-i386-y += pci_cpustate.o
>
> obj-i386-y += pcspk.o i8254.o
> obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o
> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> index 7c6f42d..090d7ba 100644
> --- a/hw/pc_piix.c
> +++ b/hw/pc_piix.c
> @@ -199,6 +199,7 @@ static void pc_init1(MemoryRegion *system_memory,
> pci_nic_init_nofail(nd, "rtl8139", NULL);
> }
>
> + pc_cpustate_init(NULL);
> ide_drive_get(hd, MAX_IDE_BUS);
> if (pci_enabled) {
> PCIDevice *dev;
> diff --git a/hw/pci.c b/hw/pci.c
> index 5c87a62..74a8975 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -1663,6 +1663,28 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model,
> return pci_dev;
> }
>
> +PCIDevice *pc_cpustate_init(const char *default_devaddr)
> +{
> + const char *devaddr = default_devaddr;
> + PCIBus *bus;
> + int devfn;
> + PCIDevice *pci_dev;
> + DeviceState *dev;
> + bus = pci_get_bus_devfn(&devfn, devaddr);
> + if (!bus) {
> + error_report("Invalid PCI device address %s for device %s",
> + devaddr, "pcimmstub");
> + return NULL;
> + }
> +
> + pci_dev = pci_create(bus, devfn, "cpustate");
> + dev = &pci_dev->qdev;
> + if (qdev_init(dev) < 0) {
> + return NULL;
> + }
> + return pci_dev;
> +}
> +
> PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
> const char *default_devaddr)
> {
> diff --git a/hw/pci.h b/hw/pci.h
> index 071a044..bbaa013 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -279,6 +279,7 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model,
> const char *default_devaddr);
> PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
> const char *default_devaddr);
> +PCIDevice *pc_cpustate_init(const char *default_devaddr);
> int pci_bus_num(PCIBus *s);
> void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d));
> PCIBus *pci_find_root_bus(int domain);
> diff --git a/hw/pci_cpustate.c b/hw/pci_cpustate.c
> new file mode 100644
> index 0000000..fd31a1f
> --- /dev/null
> +++ b/hw/pci_cpustate.c
> @@ -0,0 +1,105 @@
> +/* pci_cpustate.c
> + * emulate a pci device to get guest os CPU_DEAD event
> + *
> + * Copyright IBM, Corp. 2011
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>
> + */
> +#include <zlib.h>
> +#include "hw.h"
> +#include "pci.h"
> +#include "qemu-timer.h"
> +#include "net.h"
> +#include "loader.h"
> +#include "sysemu.h"
> +#include "iov.h"
> +
> +#define PCI_DEVICE_ID_CPUSTATE 0x1010
> +#define CPUSTATE_REGS_SIZE 0x1000
> +
> +typedef struct VcpuState VcpuState;
> +
> +struct VcpuState {
> + PCIDevice dev;
> + MemoryRegion mmio;
> + int mmio_io_addr;
> + int mmio_index;
> + uint32_t cpuid;
> + uint32_t cpu_state;
> +};
> +
> +static const VMStateDescription vmstate_cpustate = {
> + .name = "cpustate",
> + .version_id = 1,
> + .minimum_version_id = 0,
> + .fields = (VMStateField[]) {
> + VMSTATE_END_OF_LIST()
> + },
> +};
> +
> +static void
> +cpustate_mmio_write(void *opaque, target_phys_addr_t addr, uint64_t val,
> + unsigned size)
> +{
> +}
> +
> +static uint64_t
> +cpustate_mmio_read(void *opaque, target_phys_addr_t addr, unsigned size)
> +{
> + return 0;
> +}
> +
> +static const MemoryRegionOps cpustate_ops = {
> + .read = cpustate_mmio_read,
> + .write = cpustate_mmio_write,
> + .endianness = DEVICE_LITTLE_ENDIAN,
> +};
> +
> +static int pci_cpustate_init(PCIDevice *dev)
> +{
> + uint8_t *pci_cfg = dev->config;
> + VcpuState *s = DO_UPCAST(VcpuState, dev, dev);
> + memory_region_init_io(&s->mmio, &cpustate_ops, s, "cpustate",
> + CPUSTATE_REGS_SIZE);
> + pci_cfg[PCI_INTERRUPT_PIN] = 1;
> + /* I/O handler for memory-mapped I/O */
> + pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
> + return 0;
> +}
> +
> +static int pci_cpustate_exit(PCIDevice *dev)
> +{
> + return 0;
> +}
> +
> +static PCIDeviceInfo cpustate_info = {
> + .qdev.name = "cpustate",
> + .qdev.size = sizeof(VcpuState),
> + .qdev.vmsd = &vmstate_cpustate,
> + .init = pci_cpustate_init,
> + .exit = pci_cpustate_exit,
> + .vendor_id = PCI_VENDOR_ID_IBM,
> + .device_id = PCI_DEVICE_ID_CPUSTATE,
> + .revision = 0x10,
> + .class_id = PCI_CLASS_SYSTEM_OTHER,
> + .qdev.props = (Property[]) {
> + DEFINE_PROP_END_OF_LIST(),
> + }
> +};
> +
> +static void cpustate_register_devices(void)
> +{
> + pci_qdev_register(&cpustate_info);
> +}
> +device_init(cpustate_register_devices)
> --
> 1.7.4.4
>
--
Gleb.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 4/5] QEMU Release vcpu and finally exit vcpu thread safely
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
` (5 preceding siblings ...)
2011-11-27 2:45 ` [Qemu-devel] [PATCH 3/5] QEMU Introduce a pci device "cpustate" to get CPU_DEAD event in guest Liu Ping Fan
@ 2011-11-27 2:45 ` Liu Ping Fan
2011-11-29 5:37 ` ShaoHe Feng
2011-11-27 2:45 ` [Qemu-devel] [PATCH 5/5] QEMU tmp patches for linux-header files Liu Ping Fan
2011-11-27 2:47 ` [Qemu-devel] [PATCH] virtio: add a pci driver to notify host the CPU_DEAD event Liu Ping Fan
8 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:45 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
When guest driver tell us that the vcpu is no longer needed,
qemu can release the vcpu and finally exit vcpu thread
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
cpu-defs.h | 5 +++++
cpus.c | 21 +++++++++++++++++++++
hmp-commands.hx | 2 +-
hw/acpi_piix4.c | 19 ++++++++++++++++---
hw/pci_cpustate.c | 22 ++++++++++++++++++++++
kvm-all.c | 11 ++++++++++-
monitor.c | 12 +++++++-----
7 files changed, 82 insertions(+), 10 deletions(-)
diff --git a/cpu-defs.h b/cpu-defs.h
index db48a7a..cb69a07 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -153,6 +153,10 @@ typedef struct CPUWatchpoint {
QTAILQ_ENTRY(CPUWatchpoint) entry;
} CPUWatchpoint;
+#define CPU_STATE_RUNNING 0
+#define CPU_STATE_ZAPREQ 1
+#define CPU_STATE_ZAPPED 2
+
#define CPU_TEMP_BUF_NLONGS 128
#define CPU_COMMON \
struct TranslationBlock *current_tb; /* currently executing TB */ \
@@ -210,6 +214,7 @@ typedef struct CPUWatchpoint {
uint32_t created; \
uint32_t stop; /* Stop request */ \
uint32_t stopped; /* Artificially stopped */ \
+ uint32_t state; /*state indicator*/ \
struct QemuThread *thread; \
struct QemuCond *halt_cond; \
int thread_kicked; \
diff --git a/cpus.c b/cpus.c
index c996ac5..e479476 100644
--- a/cpus.c
+++ b/cpus.c
@@ -33,6 +33,7 @@
#include "qemu-thread.h"
#include "cpus.h"
+#include "cpu.h"
#ifndef _WIN32
#include "compatfd.h"
@@ -778,6 +779,7 @@ static void qemu_kvm_wait_io_event(CPUState *env)
static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUState *env = arg;
+ CPUState *prev = NULL;
int r;
qemu_mutex_lock(&qemu_global_mutex);
@@ -808,10 +810,29 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
cpu_handle_guest_debug(env);
}
}
+ /*1,try to zap; 2, can safe to destroy*/
+ if (env->state == CPU_STATE_ZAPPED) {
+ goto zapout;
+ }
qemu_kvm_wait_io_event(env);
}
return NULL;
+zapout:
+ prev = first_cpu;
+ if (prev == env) {
+ first_cpu = env->next_cpu;
+ } else {
+ while (prev != NULL) {
+ if (prev->next_cpu == env) {
+ break;
+ }
+ prev = prev->next_cpu;
+ }
+ prev->next_cpu = env->next_cpu;
+ }
+ cpu_free(env);
+ return NULL;
}
static void *qemu_tcg_cpu_thread_fn(void *arg)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed5c9b9..b642a34 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1218,7 +1218,7 @@ ETEXI
{
.name = "cpu_set",
.args_type = "cpu:i,state:s",
- .params = "cpu [online|offline]",
+ .params = "cpu [online|offline|zap]",
.help = "change cpu state",
.mhandler.cmd = do_cpu_set_nr,
},
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index f585226..1f3ed06 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -605,10 +605,23 @@ void qemu_system_cpu_hot_add(int cpu, int state)
env->cpuid_apic_id = cpu;
}
- if (state)
- enable_processor(s, cpu);
- else
+ switch (state) {
+ /*zap vcpu*/
+ case 0:
+ env = qemu_get_cpu(cpu);
+ /*1 means try to zap*/
+ env->state = CPU_STATE_ZAPREQ;
+ disable_processor(s, cpu);
+ break;
+ /*offline vcpu*/
+ case 1:
disable_processor(s, cpu);
+ break;
+ /*onine vcpu*/
+ case 2:
+ enable_processor(s, cpu);
+ break;
+ }
pm_update_sci(s);
}
diff --git a/hw/pci_cpustate.c b/hw/pci_cpustate.c
index fd31a1f..18402cf 100644
--- a/hw/pci_cpustate.c
+++ b/hw/pci_cpustate.c
@@ -24,6 +24,8 @@
#include "loader.h"
#include "sysemu.h"
#include "iov.h"
+#include <linux/kvm.h>
+#include "kvm.h"
#define PCI_DEVICE_ID_CPUSTATE 0x1010
#define CPUSTATE_REGS_SIZE 0x1000
@@ -52,6 +54,26 @@ static void
cpustate_mmio_write(void *opaque, target_phys_addr_t addr, uint64_t val,
unsigned size)
{
+ CPUState *env;
+ int ret;
+ struct kvm_vcpu_state state;
+ switch (addr) {
+ /*apic id*/
+ case 0:
+ env = cpu_phyid_to_cpu(val);
+ if (env != NULL) {
+ if (env->state == CPU_STATE_ZAPREQ) {
+ state.vcpu_id = env->cpu_index;
+ state.state = 1;
+ ret = kvm_vm_ioctl(env->kvm_state, KVM_SETSTATE_VCPU, &state);
+ }
+ }
+ break;
+ case 4:
+ break;
+ default:
+ break;
+ }
}
static uint64_t
diff --git a/kvm-all.c b/kvm-all.c
index 8dd354e..b295262 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -64,6 +64,7 @@ struct KVMState
int vmfd;
int coalesced_mmio;
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+ long mmap_size;
int broken_set_mem_region;
int migration_log;
int vcpu_events;
@@ -228,7 +229,7 @@ int kvm_init_vcpu(CPUState *env)
DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
goto err;
}
-
+ env->kvm_state->mmap_size = mmap_size;
env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
env->kvm_fd, 0);
if (env->kvm_run == MAP_FAILED) {
@@ -1026,6 +1027,13 @@ int kvm_cpu_exec(CPUState *env)
case KVM_EXIT_INTERNAL_ERROR:
ret = kvm_handle_internal_error(env, run);
break;
+ case KVM_EXIT_VCPU_DEAD:
+ ret = munmap(env->kvm_run, env->kvm_state->mmap_size);
+ ret = close(env->kvm_fd);
+ env->state = CPU_STATE_ZAPPED;
+ qemu_mutex_unlock_iothread();
+ goto out;
+ break;
default:
DPRINTF("kvm_arch_handle_exit\n");
ret = kvm_arch_handle_exit(env, run);
@@ -1033,6 +1041,7 @@ int kvm_cpu_exec(CPUState *env)
}
} while (ret == 0);
+out:
if (ret < 0) {
cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
vm_stop(VMSTOP_PANIC);
diff --git a/monitor.c b/monitor.c
index cb485bf..51c8c52 100644
--- a/monitor.c
+++ b/monitor.c
@@ -971,11 +971,13 @@ static void do_cpu_set_nr(Monitor *mon, const QDict *qdict)
status = qdict_get_str(qdict, "state");
value = qdict_get_int(qdict, "cpu");
- if (!strcmp(status, "online"))
- state = 1;
- else if (!strcmp(status, "offline"))
- state = 0;
- else {
+ if (!strcmp(status, "online")) {
+ state = 2;
+ } else if (!strcmp(status, "offline")) {
+ state = 1;
+ } else if (!strcmp(status, "zap")) {
+ state = 0;
+ } else {
monitor_printf(mon, "invalid status: %s\n", status);
return;
}
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH 4/5] QEMU Release vcpu and finally exit vcpu thread safely
2011-11-27 2:45 ` [Qemu-devel] [PATCH 4/5] QEMU Release vcpu and finally exit vcpu thread safely Liu Ping Fan
@ 2011-11-29 5:37 ` ShaoHe Feng
0 siblings, 0 replies; 19+ messages in thread
From: ShaoHe Feng @ 2011-11-29 5:37 UTC (permalink / raw)
To: Liu Ping Fan; +Cc: qemu-devel
Ping Fan,
IMO, QEMU should Release vcpu and finally exit vcpu thread safely in
tcg mode?
---
cpus.c | 21 ++++++++++++++++++++-
1 files changed, 20 insertions(+), 1 deletions(-)
diff --git a/cpus.c b/cpus.c
index 82530c4..cc52327 100644
--- a/cpus.c
+++ b/cpus.c
@@ -753,7 +753,7 @@ static void tcg_exec_all(void);
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *env = arg;
-
+ CPUState *prev = NULL;
qemu_tcg_init_cpu_signals();
qemu_thread_get_self(env->thread);
@@ -775,10 +775,29 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
qemu_notify_event();
}
+ /*1,try to zap; 2, can safe to destroy*/
+ if (env->state == CPU_STATE_ZAPPED) {
+ goto zapout;
+ }
qemu_tcg_wait_io_event();
}
return NULL;
+zapout:
+ prev = first_cpu;
+ if (prev == env) {
+ first_cpu = env->next_cpu;
+ } else {
+ while (prev != NULL) {
+ if (prev->next_cpu == env) {
+ break;
+ }
+ prev = prev->next_cpu;
+ }
+ prev->next_cpu = env->next_cpu;
+ }
+ cpu_free(env);
+ return NULL;
}
static void qemu_cpu_kick_thread(CPUState *env)
--
1.7.5.4
, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> When guest driver tell us that the vcpu is no longer needed,
> qemu can release the vcpu and finally exit vcpu thread
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
> cpu-defs.h | 5 +++++
> cpus.c | 21 +++++++++++++++++++++
> hmp-commands.hx | 2 +-
> hw/acpi_piix4.c | 19 ++++++++++++++++---
> hw/pci_cpustate.c | 22 ++++++++++++++++++++++
> kvm-all.c | 11 ++++++++++-
> monitor.c | 12 +++++++-----
> 7 files changed, 82 insertions(+), 10 deletions(-)
>
> diff --git a/cpu-defs.h b/cpu-defs.h
> index db48a7a..cb69a07 100644
> --- a/cpu-defs.h
> +++ b/cpu-defs.h
> @@ -153,6 +153,10 @@ typedef struct CPUWatchpoint {
> QTAILQ_ENTRY(CPUWatchpoint) entry;
> } CPUWatchpoint;
>
> +#define CPU_STATE_RUNNING 0
> +#define CPU_STATE_ZAPREQ 1
> +#define CPU_STATE_ZAPPED 2
> +
> #define CPU_TEMP_BUF_NLONGS 128
> #define CPU_COMMON \
> struct TranslationBlock *current_tb; /* currently executing TB */ \
> @@ -210,6 +214,7 @@ typedef struct CPUWatchpoint {
> uint32_t created; \
> uint32_t stop; /* Stop request */ \
> uint32_t stopped; /* Artificially stopped */ \
> + uint32_t state; /*state indicator*/ \
> struct QemuThread *thread; \
> struct QemuCond *halt_cond; \
> int thread_kicked; \
> diff --git a/cpus.c b/cpus.c
> index c996ac5..e479476 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -33,6 +33,7 @@
>
> #include "qemu-thread.h"
> #include "cpus.h"
> +#include "cpu.h"
>
> #ifndef _WIN32
> #include "compatfd.h"
> @@ -778,6 +779,7 @@ static void qemu_kvm_wait_io_event(CPUState *env)
> static void *qemu_kvm_cpu_thread_fn(void *arg)
> {
> CPUState *env = arg;
> + CPUState *prev = NULL;
> int r;
>
> qemu_mutex_lock(&qemu_global_mutex);
> @@ -808,10 +810,29 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
> cpu_handle_guest_debug(env);
> }
> }
> + /*1,try to zap; 2, can safe to destroy*/
> + if (env->state == CPU_STATE_ZAPPED) {
> + goto zapout;
> + }
> qemu_kvm_wait_io_event(env);
> }
>
> return NULL;
> +zapout:
> + prev = first_cpu;
> + if (prev == env) {
> + first_cpu = env->next_cpu;
> + } else {
> + while (prev != NULL) {
> + if (prev->next_cpu == env) {
> + break;
> + }
> + prev = prev->next_cpu;
> + }
> + prev->next_cpu = env->next_cpu;
> + }
> + cpu_free(env);
> + return NULL;
> }
>
> static void *qemu_tcg_cpu_thread_fn(void *arg)
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index ed5c9b9..b642a34 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1218,7 +1218,7 @@ ETEXI
> {
> .name = "cpu_set",
> .args_type = "cpu:i,state:s",
> - .params = "cpu [online|offline]",
> + .params = "cpu [online|offline|zap]",
> .help = "change cpu state",
> .mhandler.cmd = do_cpu_set_nr,
> },
> diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
> index f585226..1f3ed06 100644
> --- a/hw/acpi_piix4.c
> +++ b/hw/acpi_piix4.c
> @@ -605,10 +605,23 @@ void qemu_system_cpu_hot_add(int cpu, int state)
> env->cpuid_apic_id = cpu;
> }
>
> - if (state)
> - enable_processor(s, cpu);
> - else
> + switch (state) {
> + /*zap vcpu*/
> + case 0:
> + env = qemu_get_cpu(cpu);
> + /*1 means try to zap*/
> + env->state = CPU_STATE_ZAPREQ;
> + disable_processor(s, cpu);
> + break;
> + /*offline vcpu*/
> + case 1:
> disable_processor(s, cpu);
> + break;
> + /*onine vcpu*/
> + case 2:
> + enable_processor(s, cpu);
> + break;
> + }
>
> pm_update_sci(s);
> }
> diff --git a/hw/pci_cpustate.c b/hw/pci_cpustate.c
> index fd31a1f..18402cf 100644
> --- a/hw/pci_cpustate.c
> +++ b/hw/pci_cpustate.c
> @@ -24,6 +24,8 @@
> #include "loader.h"
> #include "sysemu.h"
> #include "iov.h"
> +#include <linux/kvm.h>
> +#include "kvm.h"
>
> #define PCI_DEVICE_ID_CPUSTATE 0x1010
> #define CPUSTATE_REGS_SIZE 0x1000
> @@ -52,6 +54,26 @@ static void
> cpustate_mmio_write(void *opaque, target_phys_addr_t addr, uint64_t val,
> unsigned size)
> {
> + CPUState *env;
> + int ret;
> + struct kvm_vcpu_state state;
> + switch (addr) {
> + /*apic id*/
> + case 0:
> + env = cpu_phyid_to_cpu(val);
> + if (env != NULL) {
> + if (env->state == CPU_STATE_ZAPREQ) {
> + state.vcpu_id = env->cpu_index;
> + state.state = 1;
> + ret = kvm_vm_ioctl(env->kvm_state, KVM_SETSTATE_VCPU, &state);
> + }
> + }
> + break;
> + case 4:
> + break;
> + default:
> + break;
> + }
> }
>
> static uint64_t
> diff --git a/kvm-all.c b/kvm-all.c
> index 8dd354e..b295262 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -64,6 +64,7 @@ struct KVMState
> int vmfd;
> int coalesced_mmio;
> struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
> + long mmap_size;
> int broken_set_mem_region;
> int migration_log;
> int vcpu_events;
> @@ -228,7 +229,7 @@ int kvm_init_vcpu(CPUState *env)
> DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
> goto err;
> }
> -
> + env->kvm_state->mmap_size = mmap_size;
> env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
> env->kvm_fd, 0);
> if (env->kvm_run == MAP_FAILED) {
> @@ -1026,6 +1027,13 @@ int kvm_cpu_exec(CPUState *env)
> case KVM_EXIT_INTERNAL_ERROR:
> ret = kvm_handle_internal_error(env, run);
> break;
> + case KVM_EXIT_VCPU_DEAD:
> + ret = munmap(env->kvm_run, env->kvm_state->mmap_size);
> + ret = close(env->kvm_fd);
> + env->state = CPU_STATE_ZAPPED;
> + qemu_mutex_unlock_iothread();
> + goto out;
> + break;
> default:
> DPRINTF("kvm_arch_handle_exit\n");
> ret = kvm_arch_handle_exit(env, run);
> @@ -1033,6 +1041,7 @@ int kvm_cpu_exec(CPUState *env)
> }
> } while (ret == 0);
>
> +out:
> if (ret < 0) {
> cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
> vm_stop(VMSTOP_PANIC);
> diff --git a/monitor.c b/monitor.c
> index cb485bf..51c8c52 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -971,11 +971,13 @@ static void do_cpu_set_nr(Monitor *mon, const QDict *qdict)
> status = qdict_get_str(qdict, "state");
> value = qdict_get_int(qdict, "cpu");
>
> - if (!strcmp(status, "online"))
> - state = 1;
> - else if (!strcmp(status, "offline"))
> - state = 0;
> - else {
> + if (!strcmp(status, "online")) {
> + state = 2;
> + } else if (!strcmp(status, "offline")) {
> + state = 1;
> + } else if (!strcmp(status, "zap")) {
> + state = 0;
> + } else {
> monitor_printf(mon, "invalid status: %s\n", status);
> return;
> }
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH 5/5] QEMU tmp patches for linux-header files
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
` (6 preceding siblings ...)
2011-11-27 2:45 ` [Qemu-devel] [PATCH 4/5] QEMU Release vcpu and finally exit vcpu thread safely Liu Ping Fan
@ 2011-11-27 2:45 ` Liu Ping Fan
2011-11-27 2:47 ` [Qemu-devel] [PATCH] virtio: add a pci driver to notify host the CPU_DEAD event Liu Ping Fan
8 siblings, 0 replies; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:45 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
Temporary patch for qemu to compile. Normally the headers should be
copied from kernel.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
kvm/include/linux/kvm.h | 9 ++++++++-
| 9 +++++++++
2 files changed, 17 insertions(+), 1 deletions(-)
diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index e46729e..a7fe019 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -162,6 +162,7 @@ struct kvm_pit_config {
#define KVM_EXIT_INTERNAL_ERROR 17
#define KVM_EXIT_OSI 18
+#define KVM_EXIT_VCPU_DEAD 20
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
@@ -328,6 +329,12 @@ struct kvm_signal_mask {
__u8 sigset[0];
};
+/*for KVM_VCPU_SET_STATE */
+struct kvm_vcpu_state {
+ int vcpu_id;
+ int state;
+};
+
/* for KVM_TPR_ACCESS_REPORTING */
struct kvm_tpr_access_ctl {
__u32 enabled;
@@ -726,7 +733,7 @@ struct kvm_clock_data {
/* Available with KVM_CAP_XCRS */
#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
-
+#define KVM_SETSTATE_VCPU _IOW(KVMIO, 0xaa, struct kvm_vcpu_state)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
struct kvm_assigned_pci_dev {
--git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index fc63b73..4422456 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -161,6 +161,8 @@ struct kvm_pit_config {
#define KVM_EXIT_NMI 16
#define KVM_EXIT_INTERNAL_ERROR 17
#define KVM_EXIT_OSI 18
+#define KVM_EXIT_VCPU_DEAD 20
+
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
@@ -328,6 +330,12 @@ struct kvm_signal_mask {
__u8 sigset[0];
};
+/*for KVM_VCPU_SET_STATE */
+struct kvm_vcpu_state {
+ int vcpu_id;
+ int state;
+};
+
/* for KVM_TPR_ACCESS_REPORTING */
struct kvm_tpr_access_ctl {
__u32 enabled;
@@ -747,6 +755,7 @@ struct kvm_clock_data {
#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
+#define KVM_SETSTATE_VCPU _IOW(KVMIO, 0xaa, struct kvm_vcpu_state)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
struct kvm_assigned_pci_dev {
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [Qemu-devel] [PATCH] virtio: add a pci driver to notify host the CPU_DEAD event
2011-11-25 2:35 [Qemu-devel] [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
` (7 preceding siblings ...)
2011-11-27 2:45 ` [Qemu-devel] [PATCH 5/5] QEMU tmp patches for linux-header files Liu Ping Fan
@ 2011-11-27 2:47 ` Liu Ping Fan
2011-11-27 11:10 ` Gleb Natapov
8 siblings, 1 reply; 19+ messages in thread
From: Liu Ping Fan @ 2011-11-27 2:47 UTC (permalink / raw)
To: kvm, qemu-devel; +Cc: aliguori, ryanh, jan.kiszka, linux-kernel, avi
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
A driver for qemu device "cpustate". This driver catch the guest
CPU_DEAD event, and notify host.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
drivers/virtio/Kconfig | 6 ++
drivers/virtio/Makefile | 1 +
drivers/virtio/cpustate_stub.c | 154 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 161 insertions(+), 0 deletions(-)
create mode 100644 drivers/virtio/cpustate_stub.c
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 816ed08..96ad253 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -46,4 +46,10 @@ config VIRTIO_BALLOON
If unsure, say N.
+ config VIRTIO_CPUSTATE
+ tristate "Driver to notify host the cpu dead event (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ ---help---
+ This drivers provides support to notify host the cpu dead event.
+
endmenu
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 5a4c63c..06a5ecf 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
+obj-$(VIRTIO_CPUSTATE) += cpustate_stub.o
diff --git a/drivers/virtio/cpustate_stub.c b/drivers/virtio/cpustate_stub.c
new file mode 100644
index 0000000..614da9d
--- /dev/null
+++ b/drivers/virtio/cpustate_stub.c
@@ -0,0 +1,154 @@
+/*
+ * PCI driver for qemu cpustate device. It notifies host the CPU_DEAD event
+ * in guest.
+ *
+ * Copyright IBM Corp. 2011
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/cpu.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+
+#define PCI_DEVICE_ID_CPUSTATE 0x1010
+
+struct cpustate_stub_regs {
+ unsigned int cpu_phyid;
+ unsigned int event;
+};
+
+struct cpustate_stub {
+ struct work_struct work;
+
+ unsigned int cpu_phyid;
+ unsigned int event;
+
+ struct cpustate_stub_regs __iomem *regs;
+};
+
+static struct cpustate_stub *agent;
+
+static void cpustate_work(struct work_struct *work)
+{
+ struct cpustate_stub *stub = container_of(work,
+ struct cpustate_stub, work);
+ printk(KERN_INFO "%s,cpu_phyid=0x%x, event=0x%x\n",
+ __func__, stub->cpu_phyid, stub->event);
+ stub->regs->cpu_phyid = stub->cpu_phyid;
+ stub->regs->event = stub->event;
+ barrier();
+}
+
+static int cpu_dead_callback(struct notifier_block *b, unsigned long action,
+ void *data)
+{
+ unsigned long cpu = (unsigned long)data;
+ int cpu_phyid;
+ switch (action) {
+ case CPU_DEAD:{
+ cpu_phyid = per_cpu(x86_cpu_to_apicid, cpu);
+ agent->cpu_phyid = cpu_phyid;
+ agent->event = CPU_DEAD;
+ schedule_work(&agent->work);
+ break;
+ }
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block __cpuinitdata cpu_dead_notifier = {
+ .notifier_call = cpu_dead_callback,
+ .priority = 10,
+};
+
+static int __devinit cpustate_probe(struct pci_dev *pci_dev,
+ const struct pci_device_id *id)
+{
+ int ret = 0;
+ agent = kzalloc(sizeof(struct cpustate_stub), GFP_KERNEL);
+ if (agent == NULL) {
+ ret = -1;
+ goto fail;
+ }
+ /* enable the device */
+ ret = pci_enable_device(pci_dev);
+ if (ret) {
+ printk(KERN_WARNING "%s, pci_enable_device fail,ret=0x%x\n",
+ __func__, ret);
+ goto fail;
+ }
+
+ ret = pci_request_regions(pci_dev, "cpustate");
+ if (ret) {
+ printk(KERN_WARNING "%s, pci_request_regions fail,ret=0x%x\n",
+ __func__, ret);
+ goto out_enable_device;
+ }
+
+ agent->regs = ioremap(pci_dev->resource[0].start,
+ pci_dev->resource[0].end - pci_dev->resource[0].start);
+ if (agent->regs == NULL) {
+ printk(KERN_WARNING "%s, ioremap fail\n", __func__);
+ goto out_req_regions;
+ }
+
+ INIT_WORK(&agent->work, cpustate_work);
+ register_cpu_notifier(&cpu_dead_notifier);
+ printk(KERN_INFO "%s, success\n", __func__);
+ return 0;
+
+out_req_regions:
+ pci_release_regions(pci_dev);
+out_enable_device:
+ pci_disable_device(pci_dev);
+ kfree(agent);
+ agent = NULL;
+fail:
+ printk(KERN_WARNING "%s fail\n", __func__);
+ return ret;
+}
+
+static void __devexit cpustate_remove(struct pci_dev *pci_dev)
+{
+ unregister_cpu_notifier(&cpu_dead_notifier);
+}
+
+/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
+static DEFINE_PCI_DEVICE_TABLE(pci_cpustate_id_table) = {
+ { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_CPUSTATE,
+ PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_SYSTEM_OTHER, 0,
+ 0 },
+ { 0 },
+};
+MODULE_DEVICE_TABLE(pci, pci_cpustate_id_table);
+
+static struct pci_driver pci_cpustate_driver = {
+ .name = "cpustate",
+ .id_table = pci_cpustate_id_table,
+ .probe = cpustate_probe,
+ .remove = __devexit_p(cpustate_remove),
+};
+
+static int __init pci_cpustate_init(void)
+{
+ return pci_register_driver(&pci_cpustate_driver);
+}
+module_init(pci_cpustate_init);
+
+static void __exit pci_cpustate_exit(void)
+{
+ pci_unregister_driver(&pci_cpustate_driver);
+}
+module_exit(pci_cpustate_exit);
+MODULE_DESCRIPTION("cpustate");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
--
1.7.4.4
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [Qemu-devel] [PATCH] virtio: add a pci driver to notify host the CPU_DEAD event
2011-11-27 2:47 ` [Qemu-devel] [PATCH] virtio: add a pci driver to notify host the CPU_DEAD event Liu Ping Fan
@ 2011-11-27 11:10 ` Gleb Natapov
0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2011-11-27 11:10 UTC (permalink / raw)
To: Liu Ping Fan
Cc: aliguori, kvm, qemu-devel, linux-kernel, ryanh, jan.kiszka, avi
On Sun, Nov 27, 2011 at 10:47:43AM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> A driver for qemu device "cpustate". This driver catch the guest
> CPU_DEAD event, and notify host.
>
And if you do eject properly via ACPI this driver is replaced by 3 lines
of ACPI code and works with older guests too.
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
> drivers/virtio/Kconfig | 6 ++
> drivers/virtio/Makefile | 1 +
> drivers/virtio/cpustate_stub.c | 154 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 161 insertions(+), 0 deletions(-)
> create mode 100644 drivers/virtio/cpustate_stub.c
>
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index 816ed08..96ad253 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -46,4 +46,10 @@ config VIRTIO_BALLOON
>
> If unsure, say N.
>
> + config VIRTIO_CPUSTATE
> + tristate "Driver to notify host the cpu dead event (EXPERIMENTAL)"
> + depends on EXPERIMENTAL
> + ---help---
> + This drivers provides support to notify host the cpu dead event.
> +
> endmenu
> diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> index 5a4c63c..06a5ecf 100644
> --- a/drivers/virtio/Makefile
> +++ b/drivers/virtio/Makefile
> @@ -3,3 +3,4 @@ obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
> obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
> +obj-$(VIRTIO_CPUSTATE) += cpustate_stub.o
> diff --git a/drivers/virtio/cpustate_stub.c b/drivers/virtio/cpustate_stub.c
> new file mode 100644
> index 0000000..614da9d
> --- /dev/null
> +++ b/drivers/virtio/cpustate_stub.c
> @@ -0,0 +1,154 @@
> +/*
> + * PCI driver for qemu cpustate device. It notifies host the CPU_DEAD event
> + * in guest.
> + *
> + * Copyright IBM Corp. 2011
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +#include <linux/module.h>
> +#include <linux/list.h>
> +#include <linux/cpu.h>
> +#include <linux/pci.h>
> +#include <linux/slab.h>
> +#include <linux/interrupt.h>
> +#include <linux/spinlock.h>
> +
> +#define PCI_DEVICE_ID_CPUSTATE 0x1010
> +
> +struct cpustate_stub_regs {
> + unsigned int cpu_phyid;
> + unsigned int event;
> +};
> +
> +struct cpustate_stub {
> + struct work_struct work;
> +
> + unsigned int cpu_phyid;
> + unsigned int event;
> +
> + struct cpustate_stub_regs __iomem *regs;
> +};
> +
> +static struct cpustate_stub *agent;
> +
> +static void cpustate_work(struct work_struct *work)
> +{
> + struct cpustate_stub *stub = container_of(work,
> + struct cpustate_stub, work);
> + printk(KERN_INFO "%s,cpu_phyid=0x%x, event=0x%x\n",
> + __func__, stub->cpu_phyid, stub->event);
> + stub->regs->cpu_phyid = stub->cpu_phyid;
> + stub->regs->event = stub->event;
> + barrier();
> +}
> +
> +static int cpu_dead_callback(struct notifier_block *b, unsigned long action,
> + void *data)
> +{
> + unsigned long cpu = (unsigned long)data;
> + int cpu_phyid;
> + switch (action) {
> + case CPU_DEAD:{
> + cpu_phyid = per_cpu(x86_cpu_to_apicid, cpu);
> + agent->cpu_phyid = cpu_phyid;
> + agent->event = CPU_DEAD;
> + schedule_work(&agent->work);
> + break;
> + }
> + default:
> + break;
> + }
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block __cpuinitdata cpu_dead_notifier = {
> + .notifier_call = cpu_dead_callback,
> + .priority = 10,
> +};
> +
> +static int __devinit cpustate_probe(struct pci_dev *pci_dev,
> + const struct pci_device_id *id)
> +{
> + int ret = 0;
> + agent = kzalloc(sizeof(struct cpustate_stub), GFP_KERNEL);
> + if (agent == NULL) {
> + ret = -1;
> + goto fail;
> + }
> + /* enable the device */
> + ret = pci_enable_device(pci_dev);
> + if (ret) {
> + printk(KERN_WARNING "%s, pci_enable_device fail,ret=0x%x\n",
> + __func__, ret);
> + goto fail;
> + }
> +
> + ret = pci_request_regions(pci_dev, "cpustate");
> + if (ret) {
> + printk(KERN_WARNING "%s, pci_request_regions fail,ret=0x%x\n",
> + __func__, ret);
> + goto out_enable_device;
> + }
> +
> + agent->regs = ioremap(pci_dev->resource[0].start,
> + pci_dev->resource[0].end - pci_dev->resource[0].start);
> + if (agent->regs == NULL) {
> + printk(KERN_WARNING "%s, ioremap fail\n", __func__);
> + goto out_req_regions;
> + }
> +
> + INIT_WORK(&agent->work, cpustate_work);
> + register_cpu_notifier(&cpu_dead_notifier);
> + printk(KERN_INFO "%s, success\n", __func__);
> + return 0;
> +
> +out_req_regions:
> + pci_release_regions(pci_dev);
> +out_enable_device:
> + pci_disable_device(pci_dev);
> + kfree(agent);
> + agent = NULL;
> +fail:
> + printk(KERN_WARNING "%s fail\n", __func__);
> + return ret;
> +}
> +
> +static void __devexit cpustate_remove(struct pci_dev *pci_dev)
> +{
> + unregister_cpu_notifier(&cpu_dead_notifier);
> +}
> +
> +/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
> +static DEFINE_PCI_DEVICE_TABLE(pci_cpustate_id_table) = {
> + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_CPUSTATE,
> + PCI_ANY_ID, PCI_ANY_ID,
> + PCI_CLASS_SYSTEM_OTHER, 0,
> + 0 },
> + { 0 },
> +};
> +MODULE_DEVICE_TABLE(pci, pci_cpustate_id_table);
> +
> +static struct pci_driver pci_cpustate_driver = {
> + .name = "cpustate",
> + .id_table = pci_cpustate_id_table,
> + .probe = cpustate_probe,
> + .remove = __devexit_p(cpustate_remove),
> +};
> +
> +static int __init pci_cpustate_init(void)
> +{
> + return pci_register_driver(&pci_cpustate_driver);
> +}
> +module_init(pci_cpustate_init);
> +
> +static void __exit pci_cpustate_exit(void)
> +{
> + pci_unregister_driver(&pci_cpustate_driver);
> +}
> +module_exit(pci_cpustate_exit);
> +MODULE_DESCRIPTION("cpustate");
> +MODULE_LICENSE("GPL");
> +MODULE_VERSION("1");
> --
> 1.7.4.4
>
--
Gleb.
^ permalink raw reply [flat|nested] 19+ messages in thread