* [PATCH v3 1/9] KVM: s390: vsie: Fix dat_split_ste()
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 2/9] KVM: s390: Remove non-atomic dat_crstep_xchg() Claudio Imbrenda
` (7 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
If the guest misbehaves and puts the page tables for its nested guest
inside the memory of the nested guest itself, and the guest and nested
guest are being mapped with large pages, the shadow mapping will
lose synchronization with the actual mapping, since this will cause the
large page with the vsie notification bit to be split, but the
vsie notification bit will not be propagated to the resulting small
pages.
Fix this by propagating the vsie_notif bit from large pages to normal
pages when splitting a large page.
Fixes: 2db149a0a6c5 ("KVM: s390: KVM page table management functions: walks")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christoph Schlameuss <schlameuss@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
arch/s390/kvm/dat.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 670404d4fa44..48b5f2bcf172 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -292,6 +292,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
pt->ptes[i].val = init.val | i * PAGE_SIZE;
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
+ pgste_init.vsie_notif = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 2/9] KVM: s390: Remove non-atomic dat_crstep_xchg()
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 1/9] KVM: s390: vsie: Fix dat_split_ste() Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 3/9] KVM: s390: vsie: Fix check for pre-existing shadow mapping Claudio Imbrenda
` (6 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
In practice dat_crstep_xchg() is racy and hard to use correctly. Simply
remove it and replace its uses with dat_crstep_xchg_atomic().
This solves some actual races that lead to system hangs / crashes.
Opportunistically fix an alignment issue in _gmap_crstep_xchg_atomic().
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: 589071eaaa8f ("KVM: s390: KVM page table management functions: clear and replace")
Fixes: 94fd9b16cc67 ("KVM: s390: KVM page table management functions: lifecycle management")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
---
arch/s390/kvm/dat.c | 53 ++++++++------------------
arch/s390/kvm/dat.h | 9 +++--
arch/s390/kvm/gaccess.c | 26 +++++++------
arch/s390/kvm/gmap.c | 82 ++++++++++++++++++++++++-----------------
arch/s390/kvm/gmap.h | 26 +++++++------
5 files changed, 98 insertions(+), 98 deletions(-)
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 48b5f2bcf172..8ba80b0b4698 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -134,32 +134,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
return 0;
}
-/**
- * dat_crstep_xchg() - Exchange a gmap CRSTE with another.
- * @crstep: Pointer to the CRST entry
- * @new: Replacement entry.
- * @gfn: The affected guest address.
- * @asce: The ASCE of the address space.
- *
- * Context: This function is assumed to be called with kvm->mmu_lock held.
- */
-void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
-{
- if (crstep->h.i) {
- WRITE_ONCE(*crstep, new);
- return;
- } else if (cpu_has_edat2()) {
- crdte_crste(crstep, *crstep, new, gfn, asce);
- return;
- }
-
- if (machine_has_tlb_guest())
- idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
- else
- idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
- WRITE_ONCE(*crstep, new);
-}
-
/**
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
* @crstep: Pointer to the CRST entry.
@@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
*
* Return: %true if the exchange was successful.
*/
-bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
- union asce asce)
+bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
+ gfn_t gfn, union asce asce)
{
if (old.h.i)
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
@@ -894,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
/* This table entry needs to be updated. */
if (walk->start <= gfn && walk->end >= next) {
- dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
+ if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
+ return -EINVAL;
/* A lower level table was present, needs to be freed. */
if (!crste.h.fc && !crste.h.i) {
if (is_pmd(crste))
@@ -1072,17 +1047,19 @@ int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
{
- union crste crste = READ_ONCE(*crstep);
+ union crste newcrste, oldcrste;
int *n = walk->priv;
- if (!crste.h.fc || crste.h.i || crste.h.p)
- return 0;
-
- *n = 2;
- if (crste.s.fc1.prefix_notif)
- return 0;
- crste.s.fc1.prefix_notif = 1;
- dat_crstep_xchg(crstep, crste, gfn, walk->asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
+ return 0;
+ *n = 2;
+ if (oldcrste.s.fc1.prefix_notif)
+ return 0;
+ newcrste = oldcrste;
+ newcrste.s.fc1.prefix_notif = 1;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
return 0;
}
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 123e11dcd70d..22dafc775335 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -938,11 +938,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
}
-static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
+static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
{
- union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
+ union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
- dat_crstep_xchg(crstep, newcrste, gfn, asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
+ return oldcrste;
}
static inline int get_level(union crste *crstep, union pte *ptep)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index a9da9390867d..4ee862424ca0 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1456,7 +1456,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
struct guest_fault *f, bool p)
{
- union crste newcrste;
+ union crste newcrste, oldcrste;
gfn_t gfn;
int rc;
@@ -1469,16 +1469,20 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
if (rc)
return rc;
- newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
- newcrste.s.fc1.d |= host->s.fc1.d;
- newcrste.s.fc1.sd |= host->s.fc1.sd;
- newcrste.h.p &= host->h.p;
- newcrste.s.fc1.vsie_notif = 1;
- newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
- _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
-
- newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
- dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
+ do {
+ oldcrste = READ_ONCE(*host);
+ newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
+ newcrste.s.fc1.d |= oldcrste.s.fc1.d;
+ newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
+ newcrste.h.p &= oldcrste.h.p;
+ newcrste.s.fc1.vsie_notif = 1;
+ newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
+ } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
+
+ newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
+ gfn = gpa_to_gfn(raddr);
+ while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
+ ;
return 0;
}
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index ef0c6ebfdde2..956be4c01797 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
struct clear_young_pte_priv *priv = walk->priv;
union crste crste, new;
- crste = READ_ONCE(*crstep);
+ do {
+ crste = READ_ONCE(*crstep);
+
+ if (!crste.h.fc)
+ return 0;
+ if (!crste.s.fc1.y && crste.h.i)
+ return 0;
+ if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
+ break;
- if (!crste.h.fc)
- return 0;
- if (!crste.s.fc1.y && crste.h.i)
- return 0;
- if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
new = crste;
new.h.i = 1;
new.s.fc1.y = 0;
@@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
new.s.fc1.d = 0;
new.h.p = 1;
- dat_crstep_xchg(crstep, new, gfn, walk->asce);
- }
+ } while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
+
priv->young = 1;
return 0;
}
@@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
{
struct gmap_unmap_priv *priv = walk->priv;
struct folio *folio = NULL;
+ union crste old = *crstep;
- if (crstep->h.fc) {
- if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
- folio = phys_to_folio(crste_origin_large(*crstep));
- gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
- if (folio)
- uv_convert_from_secure_folio(folio);
- }
+ if (!old.h.fc)
+ return 0;
+
+ if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
+ folio = phys_to_folio(crste_origin_large(old));
+ /* No races should happen because kvm->mmu_lock is held in write mode */
+ KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
+ priv->gmap->kvm);
+ if (folio)
+ uv_convert_from_secure_folio(folio);
return 0;
}
@@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
if (fatal_signal_pending(current))
return 1;
- crste = READ_ONCE(*table);
- if (!crste.h.fc)
- return 0;
- if (crste.h.p && !crste.s.fc1.sd)
- return 0;
+ do {
+ crste = READ_ONCE(*table);
+ if (!crste.h.fc)
+ return 0;
+ if (crste.h.p && !crste.s.fc1.sd)
+ return 0;
- /*
- * If this large page contains one or more prefixes of vCPUs that are
- * currently running, do not reset the protection, leave it marked as
- * dirty.
- */
- if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
+ /*
+ * If this large page contains one or more prefixes of vCPUs that are
+ * currently running, do not reset the protection, leave it marked as
+ * dirty.
+ */
+ if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
+ break;
new = crste;
new.h.p = 1;
new.s.fc1.sd = 0;
- gmap_crstep_xchg(gmap, table, new, gfn);
- }
+ } while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
for ( ; gfn < end; gfn++)
mark_page_dirty(gmap->kvm, gfn);
@@ -646,8 +654,8 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
{
+ union crste newcrste, oldcrste;
struct page_table *pt;
- union crste newcrste;
union crste *crstep;
union pte *ptep;
int rc;
@@ -673,7 +681,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
&crstep, &ptep);
if (rc)
return rc;
- dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (oldcrste.val == newcrste.val)
+ break;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
return 0;
}
@@ -777,8 +789,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
int rc;
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
- if (!rc)
- dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
+ if (rc)
+ return;
+ while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
+ ;
}
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
@@ -1017,8 +1031,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
return;
}
- crste = READ_ONCE(*crstep);
- dat_crstep_clear(crstep, r_gfn, sg->asce);
+
+ crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
if (crste_leaf(crste) || crste.h.i)
return;
if (is_pmd(crste))
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index ccb5cd751e31..19379d162777 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -194,35 +194,37 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
-static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
- gfn_t gfn, bool needs_lock)
+static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
+ union crste oldcrste, union crste newcrste,
+ gfn_t gfn, bool needs_lock)
{
- unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
+ unsigned long align = is_pmd(*crstep) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
lockdep_assert_held(&gmap->kvm->mmu_lock);
if (!needs_lock)
lockdep_assert_held(&gmap->children_lock);
gfn = ALIGN_DOWN(gfn, align);
- if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
- ne.s.fc1.prefix_notif = 0;
+ if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
+ newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
- (ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
- ne.s.fc1.vsie_notif = 0;
+ if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
+ (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
}
- dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
+ return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
}
-static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
- gfn_t gfn)
+static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
+ union crste oldcrste, union crste newcrste,
+ gfn_t gfn)
{
- return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
+ return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
}
/**
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 3/9] KVM: s390: vsie: Fix check for pre-existing shadow mapping
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 1/9] KVM: s390: vsie: Fix dat_split_ste() Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 2/9] KVM: s390: Remove non-atomic dat_crstep_xchg() Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 4/9] KVM: s390: vsie: Fix nested guest memory shadowing Claudio Imbrenda
` (5 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
When shadowing a nested guest, a check is performed and no shadowing is
attempted if the nested guest is already shadowed.
The existing check was incomplete; fix it by also checking whether the
leaf DAT table entry in the existing shadow gmap has the same protection
as the one specified in the guest DAT entry.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: e38c884df921 ("KVM: s390: Switch to new gmap")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
arch/s390/kvm/gaccess.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4ee862424ca0..8fd690255e1b 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1506,8 +1506,9 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
if (rc)
return rc;
- /* A race occourred. The shadow mapping is already valid, nothing to do */
- if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table)))
+ /* A race occurred. The shadow mapping is already valid, nothing to do */
+ if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
+ (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
return 0;
gl = get_level(table, ptep);
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 4/9] KVM: s390: vsie: Fix nested guest memory shadowing
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
` (2 preceding siblings ...)
2026-03-24 17:42 ` [PATCH v3 3/9] KVM: s390: vsie: Fix check for pre-existing shadow mapping Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 5/9] KVM: s390: Fix gmap_link() Claudio Imbrenda
` (4 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
Fix _do_shadow_pte() and _do_shadow_crste() to properly mark the guest
page as dirty if the shadow mapping is writeable but the guest mapping
was write-protected and becomes writeable.
Fix _do_shadow_pte() to use the correct pointer (guest pte instead of
nested guest) to set up the new pte.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: e38c884df921 ("KVM: s390: Switch to new gmap")
---
arch/s390/kvm/gaccess.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 8fd690255e1b..70703bf866db 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1437,9 +1437,11 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
if (!pgste_get_trylock(ptep_h, &pgste))
return -EAGAIN;
newpte = _pte(f->pfn, f->writable, !p, 0);
- newpte.s.d |= ptep->s.d;
- newpte.s.sd |= ptep->s.sd;
- newpte.h.p &= ptep->h.p;
+ newpte.s.d |= ptep_h->s.d;
+ newpte.s.sd |= ptep_h->s.sd;
+ newpte.h.p &= ptep_h->h.p;
+ if (!ptep_h->s.d && newpte.s.d)
+ SetPageDirty(pfn_to_page(newpte.h.pfra));
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
pgste.vsie_notif = 1;
pgste_set_unlock(ptep_h, pgste);
@@ -1477,6 +1479,8 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
newcrste.h.p &= oldcrste.h.p;
newcrste.s.fc1.vsie_notif = 1;
newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
+ if (!oldcrste.s.fc1.d && newcrste.s.fc1.d)
+ SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
} while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 5/9] KVM: s390: Fix gmap_link()
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
` (3 preceding siblings ...)
2026-03-24 17:42 ` [PATCH v3 4/9] KVM: s390: vsie: Fix nested guest memory shadowing Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 6/9] KVM: s390: vsie: Fix refcount overflow for shadow gmaps Claudio Imbrenda
` (3 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
The slow path of the fault handler ultimately called gmap_link(), which
assumed the fault was a major fault, and blindly called dat_link().
In case of minor faults, things were not always handled properly; in
particular the prefix and vsie marker bits were ignored.
Move dat_link() into gmap.c, renaming it accordingly. Once moved, the
new _gmap_link() function will be able to correctly honour the prefix
and vsie markers.
This will cause spurious unshadows in some uncommon cases.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: 94fd9b16cc67 ("KVM: s390: KVM page table management functions: lifecycle management")
Fixes: a2c17f9270cc ("KVM: s390: New gmap code")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
---
arch/s390/kvm/dat.c | 48 -------------------------------------
arch/s390/kvm/dat.h | 2 --
arch/s390/kvm/gmap.c | 56 ++++++++++++++++++++++++++++++++++++++++----
3 files changed, 52 insertions(+), 54 deletions(-)
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 8ba80b0b4698..a4f482bd3077 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -997,54 +997,6 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
}
-int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
- bool uses_skeys, struct guest_fault *f)
-{
- union crste oldval, newval;
- union pte newpte, oldpte;
- union pgste pgste;
- int rc = 0;
-
- rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
- if (rc == -EINVAL || rc == -ENOMEM)
- return rc;
- if (rc)
- return -EAGAIN;
-
- if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
- return -EINVAL;
-
- if (f->ptep) {
- pgste = pgste_get_lock(f->ptep);
- oldpte = *f->ptep;
- newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
- newpte.s.sd = oldpte.s.sd;
- oldpte.s.sd = 0;
- if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
- pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
- if (f->callback)
- f->callback(f);
- } else {
- rc = -EAGAIN;
- }
- pgste_set_unlock(f->ptep, pgste);
- } else {
- oldval = READ_ONCE(*f->crstep);
- newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
- f->write_attempt | oldval.s.fc1.d);
- newval.s.fc1.sd = oldval.s.fc1.sd;
- if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
- crste_origin_large(oldval) != crste_origin_large(newval))
- return -EAGAIN;
- if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
- return -EAGAIN;
- if (f->callback)
- f->callback(f);
- }
-
- return rc;
-}
-
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
{
union crste newcrste, oldcrste;
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 22dafc775335..efedcf96110c 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf
u16 type, u16 param);
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
-int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
- bool uses_skeys, struct guest_fault *f);
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 956be4c01797..03e15b5e0b9a 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -631,10 +631,60 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn)
return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags);
}
+static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level,
+ struct guest_fault *f)
+{
+ union crste oldval, newval;
+ union pte newpte, oldpte;
+ union pgste pgste;
+ int rc = 0;
+
+ rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level,
+ &f->crstep, &f->ptep);
+ if (rc == -ENOMEM)
+ return rc;
+ if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm))
+ return rc;
+ if (rc)
+ return -EAGAIN;
+ if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm))
+ return -EINVAL;
+
+ if (f->ptep) {
+ pgste = pgste_get_lock(f->ptep);
+ oldpte = *f->ptep;
+ newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
+ newpte.s.sd = oldpte.s.sd;
+ oldpte.s.sd = 0;
+ if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
+ pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn);
+ if (f->callback)
+ f->callback(f);
+ } else {
+ rc = -EAGAIN;
+ }
+ pgste_set_unlock(f->ptep, pgste);
+ } else {
+ do {
+ oldval = READ_ONCE(*f->crstep);
+ newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
+ f->write_attempt | oldval.s.fc1.d);
+ newval.s.fc1.sd = oldval.s.fc1.sd;
+ if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
+ crste_origin_large(oldval) != crste_origin_large(newval))
+ return -EAGAIN;
+ } while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn));
+ if (f->callback)
+ f->callback(f);
+ }
+
+ return rc;
+}
+
int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f)
{
unsigned int order;
- int rc, level;
+ int level;
lockdep_assert_held(&gmap->kvm->mmu_lock);
@@ -646,9 +696,7 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn))
level = TABLE_TYPE_SEGMENT;
}
- rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f);
- KVM_BUG_ON(rc == -EINVAL, gmap->kvm);
- return rc;
+ return _gmap_link(mc, gmap, level, f);
}
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 6/9] KVM: s390: vsie: Fix refcount overflow for shadow gmaps
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
` (4 preceding siblings ...)
2026-03-24 17:42 ` [PATCH v3 5/9] KVM: s390: Fix gmap_link() Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:42 ` [PATCH v3 7/9] KVM: s390: vsie: Fix unshadowing while shadowing Claudio Imbrenda
` (2 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
In most cases gmap_put() was not called when it should have.
Add the missing gmap_put() in vsie_run().
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: e38c884df921 ("KVM: s390: Switch to new gmap")
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
arch/s390/kvm/vsie.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 0330829b4046..72895dddc39a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1328,7 +1328,7 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- struct gmap *sg;
+ struct gmap *sg = NULL;
int rc = 0;
while (1) {
@@ -1368,6 +1368,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
sg = gmap_put(sg);
cond_resched();
}
+ if (sg)
+ sg = gmap_put(sg);
if (rc == -EFAULT) {
/*
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 7/9] KVM: s390: vsie: Fix unshadowing while shadowing
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
` (5 preceding siblings ...)
2026-03-24 17:42 ` [PATCH v3 6/9] KVM: s390: vsie: Fix refcount overflow for shadow gmaps Claudio Imbrenda
@ 2026-03-24 17:42 ` Claudio Imbrenda
2026-03-24 17:43 ` [PATCH v3 8/9] KVM: s390: vsie: Fix guest page tables protection Claudio Imbrenda
2026-03-24 17:43 ` [PATCH v3 9/9] KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl Claudio Imbrenda
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:42 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
If shadowing causes the shadow gmap to get unshadowed, exit early to
prevent an attempt to dereference the parent pointer, which at this
point is NULL.
Opportunistically add some more checks to prevent NULL parents.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: a2c17f9270cc ("KVM: s390: New gmap code")
Fixes: e5f98a6899bd ("KVM: s390: Add some helper functions needed for vSIE")
Fixes: e38c884df921 ("KVM: s390: Switch to new gmap")
---
arch/s390/kvm/gaccess.c | 5 +++++
arch/s390/kvm/gmap.c | 11 ++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 70703bf866db..0ac2d775d4c0 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1472,6 +1472,9 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
return rc;
do {
+ /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */
+ if (!sg->parent)
+ return -EAGAIN;
oldcrste = READ_ONCE(*host);
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
newcrste.s.fc1.d |= oldcrste.s.fc1.d;
@@ -1526,6 +1529,8 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
entries[i - 1].pfn, i, entries[i - 1].writable);
if (rc)
return rc;
+ if (!sg->parent)
+ return -EAGAIN;
}
rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 03e15b5e0b9a..fb9cc40e919c 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -1163,6 +1163,7 @@ struct gmap_protect_asce_top_level {
static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
struct gmap_protect_asce_top_level *context)
{
+ struct gmap *parent;
int rc, i;
guard(write_lock)(&sg->kvm->mmu_lock);
@@ -1170,7 +1171,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s
if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f))
return -EAGAIN;
- scoped_guard(spinlock, &sg->parent->children_lock) {
+ parent = READ_ONCE(sg->parent);
+ if (!parent)
+ return -EAGAIN;
+ scoped_guard(spinlock, &parent->children_lock) {
+ if (READ_ONCE(sg->parent) != parent)
+ return -EAGAIN;
for (i = 0; i < CRST_TABLE_PAGES; i++) {
if (!context->f[i].valid)
continue;
@@ -1253,6 +1259,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare
struct gmap *sg, *new;
int rc;
+ if (WARN_ON(!parent))
+ return ERR_PTR(-EINVAL);
+
scoped_guard(spinlock, &parent->children_lock) {
sg = gmap_find_shadow(parent, asce, edat_level);
if (sg) {
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 8/9] KVM: s390: vsie: Fix guest page tables protection
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
` (6 preceding siblings ...)
2026-03-24 17:42 ` [PATCH v3 7/9] KVM: s390: vsie: Fix unshadowing while shadowing Claudio Imbrenda
@ 2026-03-24 17:43 ` Claudio Imbrenda
2026-03-25 12:30 ` kernel test robot
2026-03-24 17:43 ` [PATCH v3 9/9] KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl Claudio Imbrenda
8 siblings, 1 reply; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:43 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
When shadowing, the guest page tables are write-protected, in order to
trap changes and properly unshadow the shadow mapping for the nested
guest. Already shadowed levels are skipped, so that only the needed
levels are write protected.
Currently the levels that get write protected are exactly one level too
deep: the last level (nested guest memory) gets protected in the wrong
way, and will be protected again correctly a few lines afterwards; most
importantly, the highest non-shadowed level does *not* get write
protected.
Moreover, if the nested guest is running in a real address space, there
are no DAT tables to shadow.
Write protect the correct levels, so that all the levels that need to
be protected are protected, and avoid double protecting the last level;
skip attempting to shadow the DAT tables when the nested guest is
running in a real address space.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: e38c884df921 ("KVM: s390: Switch to new gmap")
Tested-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
arch/s390/kvm/gaccess.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0ac2d775d4c0..93a757749a6e 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1518,6 +1518,13 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
(!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
return 0;
+ /* In case of a real address space */
+ if (w->level <= LEVEL_MEM) {
+ l = TABLE_TYPE_PAGE_TABLE;
+ hl = TABLE_TYPE_REGION1;
+ goto real_address_space;
+ }
+
gl = get_level(table, ptep);
/*
@@ -1525,8 +1532,8 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
* only the page containing the entry, not the whole table.
*/
for (i = gl ; i >= w->level; i--) {
- rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr),
- entries[i - 1].pfn, i, entries[i - 1].writable);
+ rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
+ entries[i].pfn, i + 1, entries[i].writable);
if (rc)
return rc;
if (!sg->parent)
@@ -1542,6 +1549,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
/* Get the smallest granularity */
l = min3(gl, hl, w->level);
+real_address_space:
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
/* If necessary, create the shadow mapping */
if (l < gl) {
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v3 8/9] KVM: s390: vsie: Fix guest page tables protection
2026-03-24 17:43 ` [PATCH v3 8/9] KVM: s390: vsie: Fix guest page tables protection Claudio Imbrenda
@ 2026-03-25 12:30 ` kernel test robot
0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2026-03-25 12:30 UTC (permalink / raw)
To: Claudio Imbrenda, kvm
Cc: llvm, oe-kbuild-all, linux-kernel, linux-s390, borntraeger,
frankja, nrb, seiden, gra, schlameuss, hca, david
Hi Claudio,
kernel test robot noticed the following build warnings:
[auto build test WARNING on kvm/queue]
[also build test WARNING on kvm/next linus/master v7.0-rc5 next-20260324]
[cannot apply to kvms390/next kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Claudio-Imbrenda/KVM-s390-vsie-Fix-dat_split_ste/20260325-092851
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20260324174301.232921-9-imbrenda%40linux.ibm.com
patch subject: [PATCH v3 8/9] KVM: s390: vsie: Fix guest page tables protection
config: s390-defconfig (https://download.01.org/0day-ci/archive/20260325/202603252045.f3Juk9sU-lkp@intel.com/config)
compiler: clang version 23.0.0git (https://github.com/llvm/llvm-project 054e11d1a17e5ba88bb1a8ef32fad3346e80b186)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260325/202603252045.f3Juk9sU-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603252045.f3Juk9sU-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> arch/s390/kvm/gaccess.c:1522:6: warning: variable 'gl' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
1522 | if (w->level <= LEVEL_MEM) {
| ^~~~~~~~~~~~~~~~~~~~~
arch/s390/kvm/gaccess.c:1555:10: note: uninitialized use occurs here
1555 | if (l < gl) {
| ^~
arch/s390/kvm/gaccess.c:1522:2: note: remove the 'if' if its condition is always false
1522 | if (w->level <= LEVEL_MEM) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
1523 | l = TABLE_TYPE_PAGE_TABLE;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~
1524 | hl = TABLE_TYPE_REGION1;
| ~~~~~~~~~~~~~~~~~~~~~~~~
1525 | goto real_address_space;
| ~~~~~~~~~~~~~~~~~~~~~~~~
1526 | }
| ~
arch/s390/kvm/gaccess.c:1500:22: note: initialize the variable 'gl' to silence this warning
1500 | int flags, i, hl, gl, l, rc;
| ^
| = 0
1 warning generated.
vim +1522 arch/s390/kvm/gaccess.c
1495
1496 static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
1497 unsigned long saddr, struct pgtwalk *w)
1498 {
1499 struct guest_fault *entries;
1500 int flags, i, hl, gl, l, rc;
1501 union crste *table, *host;
1502 union pte *ptep, *ptep_h;
1503
1504 lockdep_assert_held(&sg->kvm->mmu_lock);
1505 lockdep_assert_held(&sg->parent->children_lock);
1506
1507 entries = get_entries(w);
1508 ptep_h = NULL;
1509 ptep = NULL;
1510
1511 rc = dat_entry_walk(NULL, gpa_to_gfn(saddr), sg->asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE,
1512 &table, &ptep);
1513 if (rc)
1514 return rc;
1515
1516 /* A race occurred. The shadow mapping is already valid, nothing to do */
1517 if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
1518 (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
1519 return 0;
1520
1521 /* In case of a real address space */
> 1522 if (w->level <= LEVEL_MEM) {
1523 l = TABLE_TYPE_PAGE_TABLE;
1524 hl = TABLE_TYPE_REGION1;
1525 goto real_address_space;
1526 }
1527
1528 gl = get_level(table, ptep);
1529
1530 /*
1531 * Skip levels that are already protected. For each level, protect
1532 * only the page containing the entry, not the whole table.
1533 */
1534 for (i = gl ; i >= w->level; i--) {
1535 rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
1536 entries[i].pfn, i + 1, entries[i].writable);
1537 if (rc)
1538 return rc;
1539 if (!sg->parent)
1540 return -EAGAIN;
1541 }
1542
1543 rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
1544 TABLE_TYPE_PAGE_TABLE, &host, &ptep_h);
1545 if (rc)
1546 return rc;
1547
1548 hl = get_level(host, ptep_h);
1549 /* Get the smallest granularity */
1550 l = min3(gl, hl, w->level);
1551
1552 real_address_space:
1553 flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
1554 /* If necessary, create the shadow mapping */
1555 if (l < gl) {
1556 rc = dat_entry_walk(mc, gpa_to_gfn(saddr), sg->asce, flags, l, &table, &ptep);
1557 if (rc)
1558 return rc;
1559 }
1560 if (l < hl) {
1561 rc = dat_entry_walk(mc, entries[LEVEL_MEM].gfn, sg->parent->asce,
1562 flags, l, &host, &ptep_h);
1563 if (rc)
1564 return rc;
1565 }
1566
1567 if (KVM_BUG_ON(l > TABLE_TYPE_REGION3, sg->kvm))
1568 return -EFAULT;
1569 if (l == TABLE_TYPE_PAGE_TABLE)
1570 return _do_shadow_pte(sg, saddr, ptep_h, ptep, entries + LEVEL_MEM, w->p);
1571 return _do_shadow_crste(sg, saddr, host, table, entries + LEVEL_MEM, w->p);
1572 }
1573
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 9/9] KVM: s390: Fix KVM_S390_VCPU_FAULT ioctl
2026-03-24 17:42 [PATCH v3 0/9] KVM: s390: More memory management fixes Claudio Imbrenda
` (7 preceding siblings ...)
2026-03-24 17:43 ` [PATCH v3 8/9] KVM: s390: vsie: Fix guest page tables protection Claudio Imbrenda
@ 2026-03-24 17:43 ` Claudio Imbrenda
8 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2026-03-24 17:43 UTC (permalink / raw)
To: kvm
Cc: linux-kernel, linux-s390, borntraeger, frankja, nrb, seiden, gra,
schlameuss, hca, david
A previous commit changed the behaviour of the KVM_S390_VCPU_FAULT
ioctl. The current (wrong) implementation will trigger a guest
addressing exception if the requested address lies outside of a
memslot, unless the VM is UCONTROL.
Restore the previous behaviour by open coding the fault-in logic.
Fixes: 3762e905ec2e ("KVM: s390: use __kvm_faultin_pfn()")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
---
arch/s390/kvm/kvm-s390.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ebcb0ef8835e..62f04931b54d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5520,9 +5520,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = vcpu_dat_fault_handler(vcpu, arg, 0);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ gpa_t gaddr = arg;
+
+ scoped_guard(srcu, &vcpu->kvm->srcu) {
+ r = vcpu_ucontrol_translate(vcpu, &gaddr);
+ if (r)
+ break;
+
+ r = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(gaddr), false);
+ if (r == PGM_ADDRESSING)
+ r = -EFAULT;
+ if (r <= 0)
+ break;
+ r = -EIO;
+ KVM_BUG_ON(r, vcpu->kvm);
+ }
break;
}
case KVM_ENABLE_CAP:
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread