From: Claudio Imbrenda <imbrenda@linux.ibm.com>
To: kvm@vger.kernel.org
Cc: linux-s390@vger.kernel.org, frankja@linux.ibm.com,
borntraeger@de.ibm.com, schlameuss@linux.ibm.com,
david@redhat.com, willy@infradead.org, hca@linux.ibm.com,
svens@linux.ibm.com, agordeev@linux.ibm.com, gor@linux.ibm.com,
nrb@linux.ibm.com, nsg@linux.ibm.com, seanjc@google.com,
seiden@linux.ibm.com
Subject: [PATCH v2 11/15] KVM: s390: stop using lists to keep track of used dat tables
Date: Thu, 16 Jan 2025 12:33:51 +0100 [thread overview]
Message-ID: <20250116113355.32184-12-imbrenda@linux.ibm.com> (raw)
In-Reply-To: <20250116113355.32184-1-imbrenda@linux.ibm.com>
Until now, every dat table allocated to map a guest was put in a
linked list. The page->lru field of struct page was used to keep track
of which pages were being used, and when the gmap is torn down, the
list was walked and all pages freed.
This patch gets rid of the usage of page->lru. Page tables are now
freed by recursively walking the dat table tree.
Since s390_unlist_old_asce() becomes useless now, remove it.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
arch/s390/include/asm/gmap.h | 3 --
arch/s390/mm/gmap.c | 102 ++++++++---------------------------
2 files changed, 23 insertions(+), 82 deletions(-)
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 2432c47d36db..99ded56c914b 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -45,7 +45,6 @@
*/
struct gmap {
struct list_head list;
- struct list_head crst_list;
struct mm_struct *mm;
struct radix_tree_root guest_to_host;
struct radix_tree_root host_to_guest;
@@ -61,7 +60,6 @@ struct gmap {
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
- struct list_head pt_list;
spinlock_t shadow_lock;
struct gmap *parent;
unsigned long orig_asce;
@@ -141,7 +139,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
int s390_disable_cow_sharing(void);
-void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index c0f79c14277e..a5c3ae18bc6f 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -73,9 +73,7 @@ struct gmap *gmap_alloc(unsigned long limit)
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
if (!gmap)
goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
INIT_LIST_HEAD(&gmap->children);
- INIT_LIST_HEAD(&gmap->pt_list);
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
@@ -85,7 +83,6 @@ struct gmap *gmap_alloc(unsigned long limit)
page = gmap_alloc_crst();
if (!page)
goto out_free;
- list_add(&page->lru, &gmap->crst_list);
table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
@@ -188,6 +185,27 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
} while (nr > 0);
}
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+ bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+ int i;
+
+ if (is_segment) {
+ if (!free_ptes)
+ goto out;
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+ page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+ } else {
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _REGION_ENTRY_INVALID))
+ gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+ }
+
+out:
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
@@ -196,24 +214,17 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
*/
void gmap_free(struct gmap *gmap)
{
- struct page *page, *next;
-
/* Flush tlb of all gmaps (if not already done for shadows) */
if (!(gmap_is_shadow(gmap) && gmap->removed))
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
- struct ptdesc *ptdesc, *n;
-
- /* Free all page tables. */
- list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
- page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
@@ -302,7 +313,6 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
*table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
page = NULL;
@@ -1230,7 +1240,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
@@ -1258,7 +1267,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
}
@@ -1288,7 +1296,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1316,7 +1323,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1346,7 +1352,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1374,7 +1379,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1404,7 +1408,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1436,7 +1439,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1531,7 +1533,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1615,7 +1616,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1699,7 +1699,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1820,7 +1819,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2610,49 +2608,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
}
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
-/**
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
- * list of page tables of the gmap.
- * @gmap: the gmap whose table is to be removed
- *
- * On s390x, KVM keeps a list of all pages containing the page tables of the
- * gmap (the CRST list). This list is used at tear down time to free all
- * pages that are now not needed anymore.
- *
- * This function removes the topmost page of the tree (the one pointed to by
- * the ASCE) from the CRST list.
- *
- * This means that it will not be freed when the VM is torn down, and needs
- * to be handled separately by the caller, unless a leak is actually
- * intended. Notice that this function will only remove the page from the
- * list, the page will still be used as a top level page table (and ASCE).
- */
-void s390_unlist_old_asce(struct gmap *gmap)
-{
- struct page *old;
-
- old = virt_to_page(gmap->table);
- spin_lock(&gmap->guest_table_lock);
- list_del(&old->lru);
- /*
- * Sometimes the topmost page might need to be "removed" multiple
- * times, for example if the VM is rebooted into secure mode several
- * times concurrently, or if s390_replace_asce fails after calling
- * s390_remove_old_asce and is attempted again later. In that case
- * the old asce has been removed from the list, and therefore it
- * will not be freed when the VM terminates, but the ASCE is still
- * in use and still pointed to.
- * A subsequent call to replace_asce will follow the pointer and try
- * to remove the same page from the list again.
- * Therefore it's necessary that the page of the ASCE has valid
- * pointers, so list_del can work (and do nothing) without
- * dereferencing stale or invalid pointers.
- */
- INIT_LIST_HEAD(&old->lru);
- spin_unlock(&gmap->guest_table_lock);
-}
-EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
-
/**
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
@@ -2672,8 +2627,6 @@ int s390_replace_asce(struct gmap *gmap)
struct page *page;
void *table;
- s390_unlist_old_asce(gmap);
-
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
@@ -2684,15 +2637,6 @@ int s390_replace_asce(struct gmap *gmap)
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
- /*
- * The caller has to deal with the old ASCE, but here we make sure
- * the new one is properly added to the CRST list, so that
- * it will be freed when the VM is torn down.
- */
- spin_lock(&gmap->guest_table_lock);
- list_add(&page->lru, &gmap->crst_list);
- spin_unlock(&gmap->guest_table_lock);
-
/* Set new table origin while preserving existing ASCE control bits */
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
WRITE_ONCE(gmap->asce, asce);
--
2.47.1
next prev parent reply other threads:[~2025-01-16 11:34 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-16 11:33 [PATCH v2 00/15] KVM: s390: Stop using page->index and other things Claudio Imbrenda
2025-01-16 11:33 ` [PATCH v2 01/15] KVM: Do not restrict the size of KVM-internal memory regions Claudio Imbrenda
2025-01-16 12:17 ` Christoph Schlameuss
2025-01-16 11:33 ` [PATCH v2 02/15] KVM: s390: wrapper for KVM_BUG Claudio Imbrenda
2025-01-17 16:36 ` Steffen Eiden
2025-01-16 11:33 ` [PATCH v2 03/15] KVM: s390: move pv gmap functions into kvm Claudio Imbrenda
2025-01-17 13:38 ` Janosch Frank
2025-01-17 13:49 ` Claudio Imbrenda
2025-01-17 16:04 ` Janosch Frank
2025-01-16 11:33 ` [PATCH v2 04/15] KVM: s390: fake memslot for ucontrol VMs Claudio Imbrenda
2025-01-16 12:42 ` Janosch Frank
2025-01-16 12:46 ` Claudio Imbrenda
2025-01-16 11:33 ` [PATCH v2 05/15] KVM: s390: selftests: fix ucontrol memory region test Claudio Imbrenda
2025-01-16 11:33 ` [PATCH v2 06/15] KVM: s390: use __kvm_faultin_pfn() Claudio Imbrenda
2025-01-17 16:20 ` Janosch Frank
2025-01-16 11:33 ` [PATCH v2 07/15] KVM: s390: get rid of gmap_fault() Claudio Imbrenda
2025-01-17 16:22 ` Janosch Frank
2025-01-16 11:33 ` [PATCH v2 08/15] KVM: s390: get rid of gmap_translate() Claudio Imbrenda
2025-01-17 16:29 ` Janosch Frank
2025-01-16 11:33 ` [PATCH v2 09/15] KVM: s390: move some gmap shadowing functions away from mm/gmap.c Claudio Imbrenda
2025-01-17 16:41 ` Janosch Frank
2025-01-16 11:33 ` [PATCH v2 10/15] KVM: s390: stop using page->index for non-shadow gmaps Claudio Imbrenda
2025-01-17 16:52 ` Janosch Frank
2025-01-16 11:33 ` Claudio Imbrenda [this message]
2025-01-20 12:21 ` [PATCH v2 11/15] KVM: s390: stop using lists to keep track of used dat tables Janosch Frank
2025-01-16 11:33 ` [PATCH v2 12/15] KVM: s390: move gmap_shadow_pgt_lookup() into kvm Claudio Imbrenda
2025-01-17 12:58 ` Steffen Eiden
2025-01-20 13:47 ` Janosch Frank
2025-01-20 13:54 ` Claudio Imbrenda
2025-01-16 11:33 ` [PATCH v2 13/15] KVM: s390: remove useless page->index usage Claudio Imbrenda
2025-01-20 12:25 ` Janosch Frank
2025-01-16 11:33 ` [PATCH v2 14/15] KVM: s390: move PGSTE softbits Claudio Imbrenda
2025-01-17 16:11 ` Steffen Eiden
2025-01-16 11:33 ` [PATCH v2 15/15] KVM: s390: remove the last user of page->index Claudio Imbrenda
2025-01-17 16:34 ` Steffen Eiden
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250116113355.32184-12-imbrenda@linux.ibm.com \
--to=imbrenda@linux.ibm.com \
--cc=agordeev@linux.ibm.com \
--cc=borntraeger@de.ibm.com \
--cc=david@redhat.com \
--cc=frankja@linux.ibm.com \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=nrb@linux.ibm.com \
--cc=nsg@linux.ibm.com \
--cc=schlameuss@linux.ibm.com \
--cc=seanjc@google.com \
--cc=seiden@linux.ibm.com \
--cc=svens@linux.ibm.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox