public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
To: kvm@vger.kernel.org
Cc: linux-s390@vger.kernel.org, borntraeger@de.ibm.com,
	frankja@linux.ibm.com, nsg@linux.ibm.com, nrb@linux.ibm.com,
	seiden@linux.ibm.com, schlameuss@linux.ibm.com,
	hca@linux.ibm.com, svens@linux.ibm.com, agordeev@linux.ibm.com,
	david@redhat.com, gerald.schaefer@linux.ibm.com
Subject: [PATCH v2 12/20] KVM: s390: KVM page table management functions: lifecycle management
Date: Wed, 10 Sep 2025 20:07:38 +0200	[thread overview]
Message-ID: <20250910180746.125776-13-imbrenda@linux.ibm.com> (raw)
In-Reply-To: <20250910180746.125776-1-imbrenda@linux.ibm.com>

Add page table management functions to be used for KVM guest (gmap)
page tables.

This patch adds functions to handle memslot creation and destruction,
additional per-pagetable data stored in the PGSTEs, mapping physical
addresses into the gmap, and marking address ranges as prefix.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/kvm/dat.c | 227 ++++++++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/dat.h |  35 +++++++
 2 files changed, 262 insertions(+)

diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index f626e8c37770..4249400a9d21 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -772,3 +772,230 @@ long dat_reset_skeys(union asce asce, gfn_t start)
 
 	return _dat_walk_gfn_range(start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, NULL);
 }
+
+static long _dat_slot_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+	union crste dummy = { .val = (unsigned long)walk->priv };
+	union pte new_pte, pte = READ_ONCE(*ptep);
+
+	new_pte = _PTE_TOK(dummy.tok.type, dummy.tok.par);
+
+	/* Table entry already in the desired state */
+	if (pte.val == new_pte.val)
+		return 0;
+
+	dat_ptep_xchg(ptep, new_pte, gfn, walk->asce, false);
+	return 0;
+}
+
+static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+	union crste new_crste, crste = READ_ONCE(*crstep);
+
+	new_crste.val = (unsigned long)walk->priv;
+	new_crste.h.tt = crste.h.tt;
+
+	/* Table entry already in the desired state */
+	if (crste.val == new_crste.val)
+		return 0;
+
+	/* This table entry needs to be updated */
+	if (walk->start <= gfn && walk->end >= next) {
+		dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
+		/* A lower level table was present, needs to be freed */
+		if (!crste.h.fc && !crste.h.i)
+			dat_free_level(dereference_crste(crste), true);
+		return 0;
+	}
+
+	/* A lower level table is present, things will handled there */
+	if (!crste.h.fc && !crste.h.i)
+		return 0;
+	/* Split (install a lower level table), and handle things there */
+	return dat_split_crste(crstep, gfn, walk->asce);
+}
+
+static const struct dat_walk_ops dat_slot_ops = {
+	.pte_entry = _dat_slot_pte,
+	.crste_ops = { _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, },
+};
+
+int dat_set_slot(union asce asce, gfn_t start, gfn_t end, u16 type, u16 param)
+{
+	unsigned long token = _CRSTE_TOK(0, type, param).val;
+
+	return _dat_walk_gfn_range(start, end, asce, &dat_slot_ops,
+				   DAT_WALK_IGN_HOLES | DAT_WALK_ANY, (void *)token);
+}
+
+unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param)
+{
+	union pgste *pgstes = table->pgstes + param.offset;
+	struct page *page = virt_to_page(table);
+	unsigned long res = 0;
+
+	lock_page(page);
+	switch (param.len) {
+	case 3:
+		res = pgstes->val16;
+		pgstes++;
+		fallthrough;
+	case 2:
+		res = res << 16 | pgstes->val16;
+		pgstes++;
+		fallthrough;
+	case 1:
+		res = res << 16 | pgstes->val16;
+		pgstes++;
+		fallthrough;
+	case 0:
+		res = res << 16 | pgstes->val16;
+		break;
+	}
+	unlock_page(page);
+
+	return res;
+}
+
+void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val)
+{
+	union pgste *pgstes = table->pgstes + param.offset;
+	struct page *page = virt_to_page(table);
+
+	lock_page(page);
+	switch (param.len) {
+	case 3:
+		pgstes->val16 = val >> 48;
+		pgstes++;
+		fallthrough;
+	case 2:
+		pgstes->val16 = val >> 32;
+		pgstes++;
+		fallthrough;
+	case 1:
+		pgstes->val16 = val >> 16;
+		pgstes++;
+		fallthrough;
+	case 0:
+		pgstes->val16 = val;
+		break;
+	}
+	unlock_page(page);
+}
+
+static long _dat_test_young_pte(union pte *ptep, gfn_t start, gfn_t end, struct dat_walk *walk)
+{
+	return ptep->s.y;
+}
+
+static long _dat_test_young_crste(union crste *crstep, gfn_t start, gfn_t end,
+				  struct dat_walk *walk)
+{
+	return crstep->h.fc && crstep->s.fc1.y;
+}
+
+static const struct dat_walk_ops test_age_ops = {
+	.pte_entry = _dat_test_young_pte,
+	.pmd_entry = _dat_test_young_crste,
+	.pud_entry = _dat_test_young_crste,
+};
+
+/**
+ * dat_test_age_gfn() - test young
+ * @kvm: the kvm instance
+ * @range: the range of guest addresses whose young status needs to be cleared
+ *
+ * Context: called by KVM common code with the kvm mmu write lock held
+ * Return: 1 if any page in the given range is young, otherwise 0.
+ */
+bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
+{
+	return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
+}
+
+int dat_link(kvm_pfn_t pfn, gfn_t gfn, union asce asce, int level, bool w, bool d, bool s, bool sk)
+{
+	union crste oldval, newval;
+	union pte newpte, oldpte;
+	union crste *crstep;
+	union pgste pgste;
+	union pte *ptep;
+	int rc = 0;
+
+	rc = dat_entry_walk(gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &crstep, &ptep);
+	if (rc)
+		return rc == -EINVAL ? rc : -EAGAIN;
+
+	if (WARN_ON_ONCE(unlikely(get_level(crstep, ptep) > level)))
+		return -EINVAL;
+
+	if (ptep)  {
+		pgste = pgste_get_lock(ptep);
+		oldpte = *ptep;
+		newpte = _pte(pfn, w, d | oldpte.s.d, s);
+		newpte.s.sd = oldpte.s.sd;
+		oldpte.s.sd = 0;
+		if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == pfn)
+			pgste = __dat_ptep_xchg(ptep, pgste, newpte, gfn, asce, sk);
+		else
+			rc = -EAGAIN;
+		pgste_set_unlock(ptep, pgste);
+	} else {
+		oldval = READ_ONCE(*crstep);
+		newval = _crste_fc1(pfn, oldval.h.tt, w, d | oldval.s.fc1.d);
+		newval.s.fc1.sd = oldval.s.fc1.sd;
+		if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
+		    crste_origin_large(oldval) != crste_origin_large(newval))
+			return -EAGAIN;
+		if (!dat_crstep_xchg_atomic(crstep, oldval, newval, gfn, asce))
+			return -EAGAIN;
+	}
+
+	return rc;
+}
+
+static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+	union crste crste = READ_ONCE(*crstep);
+	int *n = walk->priv;
+
+	if (!crste.h.fc || crste.h.i || crste.h.p)
+		return 0;
+
+	*n = 2;
+	if (crste.s.fc1.prefix_notif)
+		return 0;
+	crste.s.fc1.prefix_notif = 1;
+	dat_crstep_xchg(crstep, crste, gfn, walk->asce);
+	return 0;
+}
+
+static long dat_set_pn_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+	int *n = walk->priv;
+	union pgste pgste;
+
+	pgste = pgste_get_lock(ptep);
+	if (!ptep->h.i && !ptep->h.p) {
+		pgste.prefix_notif = 1;
+		*n += 1;
+	}
+	pgste_set_unlock(ptep, pgste);
+	return 0;
+}
+
+int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
+{
+	static const struct dat_walk_ops ops = {
+		.pte_entry = dat_set_pn_pte,
+		.pmd_entry = dat_set_pn_crste,
+		.pud_entry = dat_set_pn_crste,
+	};
+
+	int n = 0;
+
+	_dat_walk_gfn_range(gfn, gfn + 2, asce, &ops, DAT_WALK_IGN_HOLES, &n);
+	if (n != 2)
+		return -EAGAIN;
+	return 0;
+}
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 40f5c1371ef3..b695eae5d763 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -374,6 +374,11 @@ struct dat_walk {
 	void *priv;
 };
 
+struct ptval_param {
+	unsigned char offset : 6;
+	unsigned char len : 2;
+};
+
 static inline union pte _pte(kvm_pfn_t pfn, bool w, bool d, bool s)
 {
 	union pte res = { .val = PFN_PHYS(pfn) };
@@ -413,6 +418,18 @@ static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool w, bool d)
 	return res;
 }
 
+/**
+ *	0	1	2	3	4	5	6	7
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ *  0	|				|	    PGT_ADDR		|
+ *  8	|	 VMADDR		|SPLTCNT|				|
+ * 16	|								|
+ * 24	|								|
+ */
+#define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1})
+#define PTVAL_PGT_ADDR	MKPTVAL(4, 8)
+#define PTVAL_VMADDR	MKPTVAL(8, 6)
+
 union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new,
 					 gfn_t gfn, union asce asce, bool has_skeys);
 bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
@@ -434,6 +451,14 @@ int dat_cond_set_storage_key(union asce asce, gfn_t gfn, union skey skey, union
 int dat_reset_reference_bit(union asce asce, gfn_t gfn);
 long dat_reset_skeys(union asce asce, gfn_t start);
 
+unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param);
+void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val);
+
+int dat_set_slot(union asce asce, gfn_t start, gfn_t end, u16 type, u16 param);
+int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
+bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
+int dat_link(kvm_pfn_t pfn, gfn_t gfn, union asce asce, int level, bool w, bool d, bool s, bool sk);
+
 static inline struct crst_table *crste_table_start(union crste *crstep)
 {
 	return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
@@ -778,4 +803,14 @@ static inline int get_level(union crste *crstep, union pte *ptep)
 	return ptep ? LEVEL_PTE : crstep->h.tt;
 }
 
+static inline int dat_delete_slot(union asce asce, gfn_t start, unsigned long npages)
+{
+	return dat_set_slot(asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING);
+}
+
+static inline int dat_create_slot(union asce asce, gfn_t start, unsigned long npages)
+{
+	return dat_set_slot(asce, start, start + npages, _DAT_TOKEN_NONE, 0);
+}
+
 #endif /* __KVM_S390_DAT_H */
-- 
2.51.0


  parent reply	other threads:[~2025-09-10 18:08 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-10 18:07 [PATCH v2 00/20] KVM: s390: gmap rewrite, the real deal Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 01/20] KVM: s390: add P bit in table entry bitfields, move union vaddress Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 02/20] s390: Move sske_frame() to a header Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 03/20] KVM: s390: Add gmap_helper_set_unused() Claudio Imbrenda
2025-09-11  8:38   ` Nico Boehr
2025-09-12  9:17   ` Nina Schoetterl-Glausch
2025-09-15 11:33     ` Claudio Imbrenda
2025-10-27 18:00       ` Nina Schoetterl-Glausch
2025-09-10 18:07 ` [PATCH v2 04/20] KVM: s390: Enable KVM_GENERIC_MMU_NOTIFIER Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 05/20] KVM: s390: Add helper functions for fault handling Claudio Imbrenda
2025-09-12 17:56   ` Nina Schoetterl-Glausch
2025-09-15 11:49     ` Claudio Imbrenda
2025-09-18 14:19   ` Alexander Gordeev
2025-09-18 14:46     ` Claudio Imbrenda
2025-09-18 14:41   ` Alexander Gordeev
2025-09-18 15:10     ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 06/20] KVM: s390: Rename some functions in gaccess.c Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 07/20] KVM: s390: KVM-specific bitfields and helper functions Claudio Imbrenda
2025-09-17 12:18   ` Heiko Carstens
2025-09-17 12:51     ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 08/20] KVM: s390: KVM page table management functions: allocation Claudio Imbrenda
2025-09-11  8:22   ` Janosch Frank
2025-09-11  8:43     ` Claudio Imbrenda
2025-09-16 16:26   ` Heiko Carstens
2025-09-16 16:47     ` Claudio Imbrenda
2025-09-16 17:01       ` Christian Borntraeger
2025-09-16 17:05         ` Claudio Imbrenda
2025-09-16 17:06           ` Christian Borntraeger
2025-09-16 17:36             ` Heiko Carstens
2025-09-17  7:27               ` Heiko Carstens
2025-09-17 11:25                 ` Claudio Imbrenda
2025-09-17 12:30                   ` Heiko Carstens
2025-09-17 13:11                     ` Claudio Imbrenda
2025-09-17 13:26                       ` Christian Borntraeger
2025-09-17 14:00                         ` Claudio Imbrenda
2025-09-17 14:05                           ` Christian Borntraeger
2025-09-17 14:11                             ` Claudio Imbrenda
2025-09-17 17:08                             ` Claudio Imbrenda
2025-09-17 13:31                       ` Heiko Carstens
2025-09-17 14:00                         ` Claudio Imbrenda
2025-09-17 12:12               ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 09/20] KVM: s390: KVM page table management functions: clear and replace Claudio Imbrenda
2025-09-11 12:57   ` Janosch Frank
2025-09-11 13:19     ` Claudio Imbrenda
2025-09-11 13:27       ` Janosch Frank
2025-09-16 15:56         ` Heiko Carstens
2025-09-16 16:47   ` Heiko Carstens
2025-09-16 17:04     ` Claudio Imbrenda
2025-09-16 17:27       ` Heiko Carstens
2025-09-10 18:07 ` [PATCH v2 10/20] KVM: s390: KVM page table management functions: walks Claudio Imbrenda
2025-09-11 12:56   ` Janosch Frank
2025-09-11 13:14     ` Claudio Imbrenda
2025-09-12  5:47       ` Gerd Bayer
2025-09-16 16:22   ` Heiko Carstens
2025-09-16 16:48     ` Claudio Imbrenda
2025-09-16 17:24       ` Heiko Carstens
2025-09-17 11:14         ` Claudio Imbrenda
2025-09-17 12:55   ` Heiko Carstens
2025-09-17 13:13     ` Claudio Imbrenda
2025-09-17 13:24       ` Heiko Carstens
2025-09-17 14:01         ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 11/20] KVM: s390: KVM page table management functions: storage keys Claudio Imbrenda
2025-09-10 18:07 ` Claudio Imbrenda [this message]
2025-09-10 18:07 ` [PATCH v2 13/20] KVM: s390: KVM page table management functions: CMMA Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 14/20] KVM: s390: New gmap code Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 15/20] KVM: s390: Stop using CONFIG_PGSTE Claudio Imbrenda
2025-09-16  7:45   ` Steffen Eiden
2025-09-10 18:07 ` [PATCH v2 16/20] KVM: s390: Switch to new gmap Claudio Imbrenda
2025-09-17 13:20   ` Heiko Carstens
2025-09-10 18:07 ` [PATCH v2 17/20] KVM: s390: Remove gmap from s390/mm Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 18/20] KVM: S390: Remove PGSTE code from linux/s390 mm Claudio Imbrenda
2025-09-16  7:30   ` Steffen Eiden
2025-09-16  9:24     ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 19/20] KVM: s390: Enable 1M pages for gmap Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 20/20] KVM: s390: Storage key manipulation IOCTL Claudio Imbrenda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250910180746.125776-13-imbrenda@linux.ibm.com \
    --to=imbrenda@linux.ibm.com \
    --cc=agordeev@linux.ibm.com \
    --cc=borntraeger@de.ibm.com \
    --cc=david@redhat.com \
    --cc=frankja@linux.ibm.com \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=nrb@linux.ibm.com \
    --cc=nsg@linux.ibm.com \
    --cc=schlameuss@linux.ibm.com \
    --cc=seiden@linux.ibm.com \
    --cc=svens@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox