public inbox for linux-s390@vger.kernel.org
 help / color / mirror / Atom feed
From: Janosch Frank <frankja@linux.ibm.com>
To: Claudio Imbrenda <imbrenda@linux.ibm.com>, kvm@vger.kernel.org
Cc: cohuck@redhat.com, borntraeger@de.ibm.com, thuth@redhat.com,
	pasic@linux.ibm.com, david@redhat.com,
	linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v1 08/11] KVM: s390: pv: lazy destroy for reboot
Date: Thu, 27 May 2021 11:43:32 +0200	[thread overview]
Message-ID: <1eb61235-75a7-4a57-38ca-d7a1219f1b8d@linux.ibm.com> (raw)
In-Reply-To: <20210517200758.22593-9-imbrenda@linux.ibm.com>

On 5/17/21 10:07 PM, Claudio Imbrenda wrote:
> Until now, destroying a protected guest was an entirely synchronous
> operation that could potentially take a very long time, depending on
> the size of the guest, due to the time needed to clean up the address
> space from protected pages.
> 
> This patch implements a lazy destroy mechanism, that allows a protected
> guest to reboot significantly faster than previously.
> 
> This is achieved by clearing the pages of the old guest in background.
> In case of reboot, the new guest will be able to run in the same
> address space almost immediately.
> 
> The old protected guest is then only destroyed when all of its memory has
> been destroyed or otherwise made non protected.

LGTM some comments below

> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  arch/s390/kvm/kvm-s390.c |   6 +-
>  arch/s390/kvm/kvm-s390.h |   2 +-
>  arch/s390/kvm/pv.c       | 118 ++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 120 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 2f09e9d7dc95..db25aa1fb6a6 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2248,7 +2248,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>  
>  		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
>  		if (r)
> -			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
> +			kvm_s390_pv_deinit_vm_deferred(kvm, &dummy, &dummy);
>  
>  		/* we need to block service interrupts from now on */
>  		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
> @@ -2267,7 +2267,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>  		 */
>  		if (r)
>  			break;
> -		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
> +		r = kvm_s390_pv_deinit_vm_deferred(kvm, &cmd->rc, &cmd->rrc);
>  
>  		/* no need to block service interrupts any more */
>  		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
> @@ -2796,7 +2796,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  	 * complaining we do not use kvm_s390_pv_is_protected.
>  	 */
>  	if (kvm_s390_pv_get_handle(kvm))
> -		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
> +		kvm_s390_pv_deinit_vm_deferred(kvm, &rc, &rrc);
>  	debug_unregister(kvm->arch.dbf);
>  	free_page((unsigned long)kvm->arch.sie_page2);
>  	if (!kvm_is_ucontrol(kvm))
> diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
> index 79dcd647b378..b3c0796a3cc1 100644
> --- a/arch/s390/kvm/kvm-s390.h
> +++ b/arch/s390/kvm/kvm-s390.h
> @@ -211,7 +211,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
>  /* implemented in pv.c */
>  int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
>  int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
> -int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
> +int kvm_s390_pv_deinit_vm_deferred(struct kvm *kvm, u16 *rc, u16 *rrc);
>  int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
>  int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
>  			      u16 *rrc);
> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
> index 59039b8a7be7..9a3547966e18 100644
> --- a/arch/s390/kvm/pv.c
> +++ b/arch/s390/kvm/pv.c
> @@ -14,8 +14,17 @@
>  #include <asm/mman.h>
>  #include <linux/pagewalk.h>
>  #include <linux/sched/mm.h>
> +#include <linux/kthread.h>
>  #include "kvm-s390.h"
>  
> +struct deferred_priv {
> +	struct mm_struct *mm;
> +	unsigned long old_table;
> +	u64 handle;
> +	void *virt;
> +	unsigned long real;
> +};
> +
>  int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
>  {
>  	int cc = 0;
> @@ -202,7 +211,7 @@ static int kvm_s390_pv_replace_asce(struct kvm *kvm)
>  }
>  
>  /* this should not fail, but if it does, we must not free the donated memory */
> -int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
> +static int kvm_s390_pv_deinit_vm_now(struct kvm *kvm, u16 *rc, u16 *rrc)
>  {
>  	int cc;
>  
> @@ -230,6 +239,111 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
>  	return cc ? -EIO : 0;
>  }
>  
> +static int kvm_s390_pv_destroy_vm_thread(void *priv)
> +{
> +	struct deferred_priv *p = priv;
> +	u16 rc, rrc;
> +	int r = 1;
> +
> +	/* Exit early if we end up being the only users of the mm */
> +	s390_uv_destroy_range(p->mm, 1, 0, TASK_SIZE_MAX);
> +	mmput(p->mm);

Where do we exit early?

> +
> +	r = uv_cmd_nodata(p->handle, UVC_CMD_DESTROY_SEC_CONF, &rc, &rrc);
> +	WARN_ONCE(r, "protvirt destroy vm failed rc %x rrc %x", rc, rrc);
> +	if (r)
> +		return r;
> +	atomic_dec(&p->mm->context.is_protected);
> +
> +	/*
> +	 * Intentional leak in case the destroy secure VM call fails. The
> +	 * call should never fail if the hardware is not broken.
> +	 */
> +	free_pages(p->real, get_order(uv_info.guest_base_stor_len));
> +	free_pages(p->old_table, CRST_ALLOC_ORDER);
> +	vfree(p->virt);
> +	kfree(p);
> +	return 0;
> +}
> +
> +static int deferred_destroy(struct kvm *kvm, struct deferred_priv *priv, u16 *rc, u16 *rrc)
> +{
> +	struct task_struct *t;
> +
> +	priv->virt = kvm->arch.pv.stor_var;
> +	priv->real = kvm->arch.pv.stor_base;

Now I know what the struct members actually mean...
Is there a reson you didn't like config_stor_* as a name or something
similar?

> +	priv->handle = kvm_s390_pv_get_handle(kvm);
> +	priv->old_table = (unsigned long)kvm->arch.gmap->table;
> +	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
> +
> +	if (kvm_s390_pv_replace_asce(kvm))
> +		goto fail;
> +
> +	t = kthread_create(kvm_s390_pv_destroy_vm_thread, priv,
> +			   "kvm_s390_pv_destroy_vm_thread");
> +	if (IS_ERR_OR_NULL(t))
> +		goto fail;
> +
> +	memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
> +	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM DEFERRED %d", t->pid);
> +	wake_up_process(t);
> +	/*
> +	 * no actual UVC is performed at this point, just return a successful
> +	 * rc value to make userspace happy, and an arbitrary rrc
> +	 */
> +	*rc = 1;
> +	*rrc = 42;
> +
> +	return 0;
> +
> +fail:
> +	kfree(priv);
> +	return kvm_s390_pv_deinit_vm_now(kvm, rc, rrc);
> +}
> +
> +/*  Clear the first 2GB of guest memory, to avoid prefix issues after reboot */
> +static void kvm_s390_clear_2g(struct kvm *kvm)
> +{
> +	struct kvm_memory_slot *slot;
> +	struct kvm_memslots *slots;
> +	unsigned long lim;
> +	int idx;
> +
> +	idx = srcu_read_lock(&kvm->srcu);
> +
> +	slots = kvm_memslots(kvm);
> +	kvm_for_each_memslot(slot, slots) {
> +		if (slot->base_gfn >= (SZ_2G / PAGE_SIZE))
> +			continue;
> +		if (slot->base_gfn + slot->npages > (SZ_2G / PAGE_SIZE))
> +			lim = slot->userspace_addr + SZ_2G - slot->base_gfn * PAGE_SIZE;
> +		else
> +			lim = slot->userspace_addr + slot->npages * PAGE_SIZE;
> +		s390_uv_destroy_range(kvm->mm, 1, slot->userspace_addr, lim);
> +	}
> +
> +	srcu_read_unlock(&kvm->srcu, idx);
> +}
> +
> +int kvm_s390_pv_deinit_vm_deferred(struct kvm *kvm, u16 *rc, u16 *rrc)
> +{
> +	struct deferred_priv *priv;
> +
> +	priv = kmalloc(sizeof(*priv), GFP_KERNEL | __GFP_ZERO);
> +	if (!priv)
> +		return kvm_s390_pv_deinit_vm_now(kvm, rc, rrc);
> +
> +	if (mmget_not_zero(kvm->mm)) {
> +		kvm_s390_clear_2g(kvm);
> +	} else {
> +		/* No deferred work to do */
> +		kfree(priv);
> +		return kvm_s390_pv_deinit_vm_now(kvm, rc, rrc);
> +	}
> +	priv->mm = kvm->mm;
> +	return deferred_destroy(kvm, priv, rc, rrc);
> +}
> +
>  int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
>  {
>  	struct uv_cb_cgc uvcb = {
> @@ -263,7 +377,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
>  	atomic_inc(&kvm->mm->context.is_protected);
>  	if (cc) {
>  		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
> -			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
> +			kvm_s390_pv_deinit_vm_now(kvm, &dummy, &dummy);
>  		} else {
>  			atomic_dec(&kvm->mm->context.is_protected);
>  			kvm_s390_pv_dealloc_vm(kvm);
> 


  reply	other threads:[~2021-05-27  9:43 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-17 20:07 [PATCH v1 00/11] KVM: s390: pv: implement lazy destroy Claudio Imbrenda
2021-05-17 20:07 ` [PATCH v1 01/11] KVM: s390: pv: leak the ASCE page when destroy fails Claudio Imbrenda
2021-05-18 10:26   ` Janosch Frank
2021-05-18 10:40     ` Claudio Imbrenda
2021-05-18 12:00       ` Janosch Frank
2021-05-17 20:07 ` [PATCH v1 02/11] KVM: s390: pv: properly handle page flags for protected guests Claudio Imbrenda
2021-05-17 20:07 ` [PATCH v1 03/11] KVM: s390: pv: handle secure storage violations " Claudio Imbrenda
2021-05-17 20:07 ` [PATCH v1 04/11] KVM: s390: pv: handle secure storage exceptions for normal guests Claudio Imbrenda
2021-05-17 20:07 ` [PATCH v1 05/11] KVM: s390: pv: refactor s390_reset_acc Claudio Imbrenda
2021-05-26 12:11   ` Janosch Frank
2021-05-17 20:07 ` [PATCH v1 06/11] KVM: s390: pv: usage counter instead of flag Claudio Imbrenda
2021-05-27  9:29   ` Janosch Frank
2021-05-17 20:07 ` [PATCH v1 07/11] KVM: s390: pv: add export before import Claudio Imbrenda
2021-05-26 11:56   ` Janosch Frank
2021-05-17 20:07 ` [PATCH v1 08/11] KVM: s390: pv: lazy destroy for reboot Claudio Imbrenda
2021-05-27  9:43   ` Janosch Frank [this message]
2021-05-17 20:07 ` [PATCH v1 09/11] KVM: s390: pv: extend lazy destroy to handle shutdown Claudio Imbrenda
2021-05-17 20:07 ` [PATCH v1 10/11] KVM: s390: pv: module parameter to fence lazy destroy Claudio Imbrenda
2021-05-27 10:35   ` Janosch Frank
2021-05-17 20:07 ` [PATCH v1 11/11] KVM: s390: pv: add support for UV feature bits Claudio Imbrenda
2021-05-18 15:05 ` [PATCH v1 00/11] KVM: s390: pv: implement lazy destroy Cornelia Huck
2021-05-18 15:36   ` Claudio Imbrenda
2021-05-18 15:45     ` Christian Borntraeger
2021-05-18 15:52       ` Cornelia Huck
2021-05-18 16:13       ` Claudio Imbrenda
2021-05-18 16:20         ` Christian Borntraeger
2021-05-18 16:34           ` Claudio Imbrenda
2021-05-18 16:35             ` Christian Borntraeger
2021-05-18 16:04     ` Cornelia Huck
2021-05-18 16:19       ` Claudio Imbrenda
2021-05-18 16:22         ` David Hildenbrand
2021-05-18 16:31           ` Claudio Imbrenda
2021-05-18 16:55             ` Christian Borntraeger
2021-05-18 17:00               ` Claudio Imbrenda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1eb61235-75a7-4a57-38ca-d7a1219f1b8d@linux.ibm.com \
    --to=frankja@linux.ibm.com \
    --cc=borntraeger@de.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=imbrenda@linux.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=pasic@linux.ibm.com \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox