All of lore.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: Liu Ping Fan <kernelfans@gmail.com>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	aliguori@us.ibm.com, gleb@redhat.com, mtosatti@redhat.com,
	xiaoguangrong.eric@gmail.com, jan.kiszka@web.de,
	yoshikawa.takuya@oss.ntt.co.jp, Rik van Riel <riel@redhat.com>
Subject: Re: [PATCH v7] kvm: make vcpu life cycle separated from kvm instance
Date: Thu, 12 Jan 2012 14:37:35 +0200	[thread overview]
Message-ID: <4F0ED40F.7050700@redhat.com> (raw)
In-Reply-To: <1325904901-10317-1-git-send-email-kernelfans@gmail.com>

On 01/07/2012 04:55 AM, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Currently, vcpu will be destructed only after kvm instance is
> destroyed. This result to vcpu keep idle in kernel, but can not
> be freed when it is unplugged in guest.
>
> Change this to vcpu's destruction before kvm instance, so vcpu MUST

Must?

> and CAN be destroyed before kvm instance. By this way, we can remove
> vcpu when guest does not need it any longer.
>
> TODO: push changes to other archs besides x86.
>
> -Rename kvm_vcpu_zap to kvm_vcpu_destruct and so on.

kvm_vcpu_destroy.

>  
>  struct kvm_vcpu {
>  	struct kvm *kvm;
> +	struct list_head list;

vcpu_list_link, so it's clear this is not a head but a link, and so we
know which list it belongs to.

> -	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
> +	struct list_head vcpus;

This has the potential for a slight performance regression by bouncing
an extra cache line, but it's acceptable IMO.  We can always introduce
an apic ID -> vcpu hash table which improves things all around.

> |
> @@ -1593,11 +1598,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>  {
>  	struct kvm *kvm = me->kvm;
>  	struct kvm_vcpu *vcpu;
> -	int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
> -	int yielded = 0;
> -	int pass;
> -	int i;
> -
> +	struct task_struct *task = NULL;
> +	struct pid *pid;
> +	int pass, firststart, lastone, yielded, idx;

Avoid unrelated changes please.

> @@ -1605,15 +1608,26 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>  	 * VCPU is holding the lock that we need and will release it.
>  	 * We approximate round-robin by starting at the last boosted VCPU.
>  	 */
> -	for (pass = 0; pass < 2 && !yielded; pass++) {
> -		kvm_for_each_vcpu(i, vcpu, kvm) {
> -			struct task_struct *task = NULL;
> -			struct pid *pid;
> -			if (!pass && i < last_boosted_vcpu) {
> -				i = last_boosted_vcpu;
> +	for (pass = 0, firststart = 0; pass < 2 && !yielded; pass++) {
> +
> +		idx = srcu_read_lock(&kvm->srcu);

Can move the lock to the top level.

> +		kvm_for_each_vcpu(vcpu, kvm) {
> +			if (kvm->last_boosted_vcpu_id < 0 && !pass) {
> +				pass = 1;
> +				break;
> +			}
> +			if (!pass && !firststart &&
> +			    vcpu->vcpu_id != kvm->last_boosted_vcpu_id) {
> +				continue;
> +			} else if (!pass && !firststart) {
> +				firststart = 1;
>  				continue;
> -			} else if (pass && i > last_boosted_vcpu)
> +			} else if (pass && !lastone) {
> +				if (vcpu->vcpu_id == kvm->last_boosted_vcpu_id)
> +					lastone = 1;
> +			} else if (pass && lastone)
>  				break;
> +

Seems like a large change.  Is this because the vcpu list is unordered? 
Maybe it's better to order it.

Rik?

>  			if (vcpu == me)
>  				continue;
>  			if (waitqueue_active(&vcpu->wq))
> @@ -1629,15 +1643,20 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>  				put_task_struct(task);
>  				continue;
>  			}
> +
>  			if (yield_to(task, 1)) {
>  				put_task_struct(task);
> -				kvm->last_boosted_vcpu = i;
> +				mutex_lock(&kvm->lock);
> +				kvm->last_boosted_vcpu_id = vcpu->vcpu_id;
> +				mutex_unlock(&kvm->lock);

Why take the mutex?

> @@ -1673,11 +1692,30 @@ static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
>  	return 0;
>  }
>  
> +static void kvm_vcpu_destruct(struct kvm_vcpu *vcpu)
> +{
> +	kvm_arch_vcpu_destruct(vcpu);
> +}
> +
>  static int kvm_vcpu_release(struct inode *inode, struct file *filp)
>  {
>  	struct kvm_vcpu *vcpu = filp->private_data;
> +	struct kvm *kvm = vcpu->kvm;
> +	filp->private_data = NULL;
> +
> +	mutex_lock(&kvm->lock);
> +	list_del_rcu(&vcpu->list);
> +	atomic_dec(&kvm->online_vcpus);
> +	mutex_unlock(&kvm->lock);
> +	synchronize_srcu_expedited(&kvm->srcu);

Why _expedited?

Even better would be call_srcu() but it doesn't exist.

I think we can actually use regular rcu.  The only user that blocks is
kvm_vcpu_on_spin(), yes? so we can convert the vcpu to a task using
get_pid_task(), then, outside the rcu lock, call yield_to().


>  
> -	kvm_put_kvm(vcpu->kvm);
> +	mutex_lock(&kvm->lock);
> +	if (kvm->last_boosted_vcpu_id == vcpu->vcpu_id)
> +		kvm->last_boosted_vcpu_id = -1;
> +	mutex_unlock(&kvm->lock);
> +
> +	/*vcpu is out of list,drop it safely*/
> +	kvm_vcpu_destruct(vcpu);

Can all kvm_arch_vcpu_destroy() directly.

> +static struct kvm_vcpu *kvm_vcpu_create(struct kvm *kvm, u32 id)
> +{
> +	struct kvm_vcpu *vcpu;
> +	vcpu = kvm_arch_vcpu_create(kvm, id);
> +	if (IS_ERR(vcpu))
> +		return vcpu;
> +	INIT_LIST_HEAD(&vcpu->list);

Really needed?

> +	return vcpu;
> +}

Just fold this into the caller.

> +
>  /*
>   * Creates some virtual cpus.  Good luck creating more than one.
>   */
>  static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  {
> -	int r;
> +	int r, idx;
>  	struct kvm_vcpu *vcpu, *v;
>  
> -	vcpu = kvm_arch_vcpu_create(kvm, id);
> +	vcpu = kvm_vcpu_create(kvm, id);
>  	if (IS_ERR(vcpu))
>  		return PTR_ERR(vcpu);
>  
> @@ -1723,13 +1771,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>  		goto unlock_vcpu_destroy;
>  	}
>  
> -	kvm_for_each_vcpu(r, v, kvm)
> +	idx = srcu_read_lock(&kvm->srcu);
> +	kvm_for_each_vcpu(v, kvm) {
>  		if (v->vcpu_id == id) {
>  			r = -EEXIST;
> +			srcu_read_unlock(&kvm->srcu, idx);

Put that in the error path please (add a new label if needed).

>  			goto unlock_vcpu_destroy;

>  
> -	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
> -	smp_wmb();
> +	/*Protected by kvm->lock*/

Spaces.

> +	list_add_rcu(&vcpu->list, &kvm->vcpus);
>  	atomic_inc(&kvm->online_vcpus);
 


-- 
error compiling committee.c: too many arguments to function


  reply	other threads:[~2012-01-12 12:37 UTC|newest]

Thread overview: 113+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-25  2:35 [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Liu Ping Fan
2011-11-25  2:35 ` [Qemu-devel] " Liu Ping Fan
2011-11-25  2:35 ` Liu Ping Fan
2011-11-25  2:35 ` [PATCH 1/2] kvm: make vcpu life cycle separated from kvm instance Liu Ping Fan
2011-11-25  2:35   ` [Qemu-devel] " Liu Ping Fan
2011-11-25  2:35   ` Liu Ping Fan
2011-11-27 10:36   ` Avi Kivity
2011-11-27 10:36     ` [Qemu-devel] " Avi Kivity
2011-11-27 10:36     ` Avi Kivity
2011-12-02  6:26     ` [PATCH] " Liu Ping Fan
2011-12-02 18:26       ` Jan Kiszka
2011-12-04 11:53         ` Liu ping fan
2011-12-04 12:10           ` Gleb Natapov
2011-12-05  5:39             ` Liu ping fan
2011-12-05  8:41               ` Gleb Natapov
2011-12-06  6:54                 ` Liu ping fan
2011-12-06  8:14                   ` Gleb Natapov
2011-12-04 10:23       ` Avi Kivity
2011-12-05  5:29         ` Liu ping fan
2011-12-05  5:29           ` Liu ping fan
2011-12-05  9:30           ` Avi Kivity
2011-12-05  9:42             ` Gleb Natapov
2011-12-05  9:58               ` Avi Kivity
2011-12-05 10:18                 ` Gleb Natapov
2011-12-05 10:22                   ` Avi Kivity
2011-12-05 10:40                     ` Gleb Natapov
2011-12-09  5:23       ` [PATCH V2] " Liu Ping Fan
2011-12-09 14:23         ` Gleb Natapov
2011-12-12  2:41           ` [PATCH v3] " Liu Ping Fan
2011-12-12 12:54             ` Gleb Natapov
2011-12-13  9:29               ` Liu ping fan
2011-12-13  9:47                 ` Gleb Natapov
2011-12-13 11:36             ` Marcelo Tosatti
2011-12-13 11:54               ` Gleb Natapov
2011-12-15  3:21               ` Liu ping fan
2011-12-15  4:28                 ` [PATCH v4] " Liu Ping Fan
2011-12-15  5:33                   ` Xiao Guangrong
2011-12-15  6:53                     ` Liu ping fan
2011-12-15  8:25                       ` Xiao Guangrong
2011-12-15  8:57                         ` Xiao Guangrong
2011-12-15  6:48                   ` Takuya Yoshikawa
2011-12-16  9:38                     ` Marcelo Tosatti
2011-12-17  3:57                     ` Liu ping fan
2011-12-19  1:16                       ` Takuya Yoshikawa
2011-12-15  9:10                   ` Gleb Natapov
2011-12-16  7:50                     ` Liu ping fan
2011-12-16  7:50                       ` Liu ping fan
2011-12-15  8:33                 ` [PATCH v3] " Gleb Natapov
2011-12-15  9:06                   ` Liu ping fan
2011-12-15  9:08                     ` Gleb Natapov
2011-12-17  3:19             ` [PATCH v5] " Liu Ping Fan
2011-12-26 11:09               ` Gleb Natapov
2011-12-26 11:17                 ` Avi Kivity
2011-12-26 11:21                   ` Gleb Natapov
2011-12-27  7:53                 ` Liu ping fan
2011-12-27  8:38               ` [PATCH v6] " Liu Ping Fan
2011-12-27 11:22                 ` Takuya Yoshikawa
2011-12-28  6:54                   ` Liu ping fan
2011-12-28  9:53                     ` Avi Kivity
2011-12-29 14:03                       ` Liu ping fan
2011-12-29 14:31                         ` Avi Kivity
2012-01-05  9:35                           ` Liu ping fan
2011-12-28 10:29                     ` Takuya Yoshikawa
2011-12-28  9:53                 ` Avi Kivity
2011-12-28  9:54                   ` Avi Kivity
2011-12-28 10:19                     ` Takuya Yoshikawa
2011-12-28 10:28                       ` Avi Kivity
2012-01-07  2:55               ` [PATCH v7] " Liu Ping Fan
2012-01-12 12:37                 ` Avi Kivity [this message]
2012-01-15 13:17                   ` Liu ping fan
2012-01-15 13:37                     ` Avi Kivity
2011-11-25 17:54 ` [PATCH 0] A series patches for kvm&qemu to enable vcpu destruction in kvm Jan Kiszka
2011-11-25 17:54   ` [Qemu-devel] " Jan Kiszka
2011-11-25 17:54   ` Jan Kiszka
2011-11-27  3:07   ` Liu ping fan
2011-11-27  3:07     ` [Qemu-devel] " Liu ping fan
2011-11-27  3:07     ` Liu ping fan
2011-11-27  2:42 ` [PATCH 2/2] kvm: exit to userspace with reason KVM_EXIT_VCPU_DEAD Liu Ping Fan
2011-11-27  2:42   ` [Qemu-devel] " Liu Ping Fan
2011-11-27  2:42   ` Liu Ping Fan
2011-11-27 10:36   ` Avi Kivity
2011-11-27 10:36     ` [Qemu-devel] " Avi Kivity
2011-11-27 10:36     ` Avi Kivity
2011-11-27 10:50     ` Gleb Natapov
2011-11-27 10:50       ` [Qemu-devel] " Gleb Natapov
2011-11-27 10:50       ` Gleb Natapov
2011-11-28  7:16       ` Liu ping fan
2011-11-28  8:46         ` Gleb Natapov
2011-11-28  8:46           ` Gleb Natapov
2011-11-27  2:45 ` [PATCH 1/5] QEMU Add cpu_phyid_to_cpu() to map cpu phyid to CPUState Liu Ping Fan
2011-11-27  2:45   ` [Qemu-devel] " Liu Ping Fan
2011-11-27  2:45   ` Liu Ping Fan
2011-11-27  2:45 ` [PATCH 2/5] QEMU Add cpu_free() to support arch related CPUState release Liu Ping Fan
2011-11-27  2:45   ` [Qemu-devel] " Liu Ping Fan
2011-11-27  2:45   ` Liu Ping Fan
2011-11-27  2:45 ` [PATCH 3/5] QEMU Introduce a pci device "cpustate" to get CPU_DEAD event in guest Liu Ping Fan
2011-11-27  2:45   ` [Qemu-devel] " Liu Ping Fan
2011-11-27  2:45   ` Liu Ping Fan
2011-11-27 10:56   ` Gleb Natapov
2011-11-27 10:56     ` [Qemu-devel] " Gleb Natapov
2011-11-27 10:56     ` Gleb Natapov
2011-11-27  2:45 ` [PATCH 4/5] QEMU Release vcpu and finally exit vcpu thread safely Liu Ping Fan
2011-11-27  2:45   ` [Qemu-devel] " Liu Ping Fan
2011-11-29  5:37   ` ShaoHe Feng
2011-11-27  2:45 ` [PATCH 5/5] QEMU tmp patches for linux-header files Liu Ping Fan
2011-11-27  2:45   ` [Qemu-devel] " Liu Ping Fan
2011-11-27  2:45   ` Liu Ping Fan
2011-11-27  2:47 ` [PATCH] virtio: add a pci driver to notify host the CPU_DEAD event Liu Ping Fan
2011-11-27  2:47   ` [Qemu-devel] " Liu Ping Fan
2011-11-27  2:47   ` Liu Ping Fan
2011-11-27 11:10   ` Gleb Natapov
2011-11-27 11:10     ` [Qemu-devel] " Gleb Natapov
2011-11-27 11:10     ` Gleb Natapov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F0ED40F.7050700@redhat.com \
    --to=avi@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=gleb@redhat.com \
    --cc=jan.kiszka@web.de \
    --cc=kernelfans@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=riel@redhat.com \
    --cc=xiaoguangrong.eric@gmail.com \
    --cc=yoshikawa.takuya@oss.ntt.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.