From: Claudio Imbrenda <imbrenda@linux.ibm.com>
To: kvm@vger.kernel.org
Cc: borntraeger@de.ibm.com, nrb@linux.ibm.com, nsg@linux.ibm.com,
frankja@linux.ibm.com, mhartmay@linux.ibm.com,
kvm390-list@tuxmaker.boeblingen.de.ibm.com,
linux-s390@vger.kernel.org
Subject: [PATCH v1 1/1] KVM: s390: pv: fix asynchronous teardown for small VMs
Date: Thu, 20 Apr 2023 18:01:49 +0200 [thread overview]
Message-ID: <20230420160149.51728-1-imbrenda@linux.ibm.com> (raw)
On machines without the Destroy Secure Configuration Fast UVC, the
topmost level of page tables is set aside and freed asynchronously
as last step of the asynchronous teardown.
Each gmap has a host_to_guest radix tree mapping host (userspace)
addresses (with 1M granularity) to gmap segment table entries (pmds).
If a guest is smaller than 2GB, the topmost level of page tables is the
segment table (i.e. there are only 2 levels). Replacing it means that
the pointers in the host_to_guest mapping would become stale and cause
all kinds of nasty issues.
This patch fixes the issue by synchronously destroying all guests with
only 2 levels of page tables in kvm_s390_pv_set_aside. This will
speed up the process and avoid the issue altogether.
Update s390_replace_asce so it refuses to replace segment type ASCEs.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: fb491d5500a7 ("KVM: s390: pv: asynchronous destroy for reboot")
---
arch/s390/kvm/pv.c | 35 ++++++++++++++++++++---------------
arch/s390/mm/gmap.c | 7 +++++++
2 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index e032ebbf51b9..ceb8cb628d62 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -39,6 +39,7 @@ struct pv_vm_to_be_destroyed {
u64 handle;
void *stor_var;
unsigned long stor_base;
+ bool small;
};
static void kvm_s390_clear_pv_state(struct kvm *kvm)
@@ -318,7 +319,11 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
if (!priv)
return -ENOMEM;
- if (is_destroy_fast_available()) {
+ if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) {
+ /* No need to do things asynchronously for VMs under 2GB */
+ res = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
+ priv->small = true;
+ } else if (is_destroy_fast_available()) {
res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
} else {
priv->stor_var = kvm->arch.pv.stor_var;
@@ -335,7 +340,8 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
return res;
}
- kvm_s390_destroy_lower_2g(kvm);
+ if (!priv->small)
+ kvm_s390_destroy_lower_2g(kvm);
kvm_s390_clear_pv_state(kvm);
kvm->arch.pv.set_aside = priv;
@@ -418,7 +424,10 @@ int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
/* If a previous protected VM was set aside, put it in the need_cleanup list */
if (kvm->arch.pv.set_aside) {
- list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
+ if (((struct pv_vm_to_be_destroyed *)kvm->arch.pv.set_aside)->small)
+ kfree(kvm->arch.pv.set_aside);
+ else
+ list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
}
@@ -485,26 +494,22 @@ int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
if (!p)
return -EINVAL;
- /* When a fatal signal is received, stop immediately */
- if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
+ if (p->small)
goto done;
- if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
- ret = -EIO;
- kfree(p);
- p = NULL;
-done:
- /*
- * p is not NULL if we aborted because of a fatal signal, in which
- * case queue the leftover for later cleanup.
- */
- if (p) {
+ /* When a fatal signal is received, stop immediately */
+ if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) {
mutex_lock(&kvm->lock);
list_add(&p->list, &kvm->arch.pv.need_cleanup);
mutex_unlock(&kvm->lock);
/* Did not finish, but pretend things went well */
*rc = UVC_RC_EXECUTED;
*rrc = 42;
+ return 0;
}
+ if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
+ ret = -EIO;
+done:
+ kfree(p);
return ret;
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 5a716bdcba05..2267cf9819b2 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2833,6 +2833,9 @@ EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
*
+ * If the ASCE is a SEGMENT type then this function will return -EINVAL,
+ * otherwise the pointers in the host_to_guest radix tree will keep pointing
+ * to the wrong pages, causing use-after-free and memory corruption.
* If the allocation of the new top level page table fails, the ASCE is not
* replaced.
* In any case, the old ASCE is always removed from the gmap CRST list.
@@ -2847,6 +2850,10 @@ int s390_replace_asce(struct gmap *gmap)
s390_unlist_old_asce(gmap);
+ /* Replacing segment type ASCEs would cause serious issues */
+ if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
+ return -EINVAL;
+
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
--
2.39.2
next reply other threads:[~2023-04-20 16:06 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-20 16:01 Claudio Imbrenda [this message]
2023-04-20 16:15 ` [PATCH v1 1/1] KVM: s390: pv: fix asynchronous teardown for small VMs Marc Hartmayer
2023-04-21 7:35 ` Claudio Imbrenda
2023-04-21 8:04 ` Janosch Frank
2023-04-21 8:17 ` Claudio Imbrenda
2023-04-21 8:07 ` Christian Borntraeger
2023-04-21 8:17 ` Claudio Imbrenda
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230420160149.51728-1-imbrenda@linux.ibm.com \
--to=imbrenda@linux.ibm.com \
--cc=borntraeger@de.ibm.com \
--cc=frankja@linux.ibm.com \
--cc=kvm390-list@tuxmaker.boeblingen.de.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=mhartmay@linux.ibm.com \
--cc=nrb@linux.ibm.com \
--cc=nsg@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox