From: Claudio Imbrenda <imbrenda@linux.ibm.com>
To: kvm@vger.kernel.org
Cc: linux-s390@vger.kernel.org, borntraeger@de.ibm.com,
frankja@linux.ibm.com, nsg@linux.ibm.com, nrb@linux.ibm.com,
seiden@linux.ibm.com, schlameuss@linux.ibm.com,
hca@linux.ibm.com, svens@linux.ibm.com, agordeev@linux.ibm.com,
david@redhat.com, gerald.schaefer@linux.ibm.com
Subject: [PATCH v2 13/20] KVM: s390: KVM page table management functions: CMMA
Date: Wed, 10 Sep 2025 20:07:39 +0200 [thread overview]
Message-ID: <20250910180746.125776-14-imbrenda@linux.ibm.com> (raw)
In-Reply-To: <20250910180746.125776-1-imbrenda@linux.ibm.com>
Add page table management functions to be used for KVM guest (gmap)
page tables.
This patch adds functions to handle CMMA and the ESSA instruction.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
arch/s390/kvm/dat.c | 259 ++++++++++++++++++++++++++++++++++++++++++++
arch/s390/kvm/dat.h | 27 +++++
2 files changed, 286 insertions(+)
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 4249400a9d21..bf9c8af1d74a 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -999,3 +999,262 @@ int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
return -EAGAIN;
return 0;
}
+
+/**
+ * dat_perform_essa() - perform ESSA actions on the PGSTE.
+ * @asce: the asce to operate on.
+ * @gfn: the guest page frame to operate on.
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @state: the storage attributes to be returned to the guest.
+ * @dirty: returns whether the function dirtied a previously clean entry.
+ *
+ * Context: Called with kvm->mmu_lock held.
+ *
+ * Return:
+ * * 1 if the page state has been altered and the page is to be added to the CBRL
+ * * 0 if the page state has been altered, but the page is not to be added to the CBRL
+ * * -1 if the page state has not been altered and the page is not to be added to the CBRL
+ */
+int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty)
+{
+ union crste *crstep;
+ union pgste pgste;
+ union pte *ptep;
+ int res = 0;
+
+ if (dat_entry_walk(gfn, asce, 0, LEVEL_PTE, &crstep, &ptep)) {
+ *state = (union essa_state) { .exception = 1 };
+ return -1;
+ }
+
+ pgste = pgste_get_lock(ptep);
+
+ *state = (union essa_state) {
+ .content = (ptep->h.i << 1) + (ptep->h.i && pgste.zero),
+ .nodat = pgste.nodat,
+ .usage = pgste.usage,
+ };
+
+ switch (orc) {
+ case ESSA_GET_STATE:
+ res = -1;
+ break;
+ case ESSA_SET_STABLE:
+ pgste.usage = PGSTE_GPS_USAGE_STABLE;
+ pgste.nodat = 0;
+ break;
+ case ESSA_SET_UNUSED:
+ pgste.usage = PGSTE_GPS_USAGE_UNUSED;
+ if (ptep->h.i)
+ res = 1;
+ break;
+ case ESSA_SET_VOLATILE:
+ pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
+ if (ptep->h.i)
+ res = 1;
+ break;
+ case ESSA_SET_POT_VOLATILE:
+ if (!ptep->h.i) {
+ pgste.usage = PGSTE_GPS_USAGE_POT_VOLATILE;
+ } else if (pgste.zero) {
+ pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
+ } else if (!pgste.gc) {
+ pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
+ res = 1;
+ }
+ break;
+ case ESSA_SET_STABLE_RESIDENT:
+ pgste.usage = PGSTE_GPS_USAGE_STABLE;
+ /*
+ * Since the resident state can go away any time after this
+ * call, we will not make this page resident. We can revisit
+ * this decision if a guest will ever start using this.
+ */
+ break;
+ case ESSA_SET_STABLE_IF_RESIDENT:
+ if (!ptep->h.i)
+ pgste.usage = PGSTE_GPS_USAGE_STABLE;
+ break;
+ case ESSA_SET_STABLE_NODAT:
+ pgste.usage = PGSTE_GPS_USAGE_STABLE;
+ pgste.nodat = 1;
+ break;
+ default:
+ WARN_ONCE(1, "Invalid ORC!");
+ res = -1;
+ break;
+ }
+ /* If we are discarding a page, set it to logical zero */
+ pgste.zero = res == 1;
+ if (orc > 0) {
+ *dirty = !pgste.cmma_d;
+ pgste.cmma_d = 1;
+ }
+
+ pgste_set_unlock(ptep, pgste);
+
+ return res;
+}
+
+static long dat_reset_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+ union pgste pgste;
+
+ pgste = pgste_get_lock(ptep);
+ pgste.usage = 0;
+ pgste.nodat = 0;
+ pgste.cmma_d = 0;
+ pgste_set_unlock(ptep, pgste);
+ if (need_resched())
+ return next;
+ return 0;
+}
+
+long dat_reset_cmma(union asce asce, gfn_t start)
+{
+ const struct dat_walk_ops dat_reset_cmma_ops = {
+ .pte_entry = dat_reset_cmma_pte,
+ };
+
+ return _dat_walk_gfn_range(start, asce_end(asce), asce, &dat_reset_cmma_ops,
+ DAT_WALK_IGN_HOLES, NULL);
+}
+
+struct dat_get_cmma_state {
+ gfn_t start;
+ gfn_t end;
+ unsigned int count;
+ u8 *values;
+ atomic64_t *remaining;
+};
+
+static long __dat_peek_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+ struct dat_get_cmma_state *state = walk->priv;
+ union pgste pgste;
+
+ pgste = pgste_get_lock(ptep);
+ state->values[gfn - walk->start] = pgste.usage | (pgste.nodat << 6);
+ pgste_set_unlock(ptep, pgste);
+ state->end = next;
+
+ return 0;
+}
+
+static long __dat_peek_cmma_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+ struct dat_get_cmma_state *state = walk->priv;
+
+ if (crstep->h.i)
+ state->end = min(walk->end, next);
+ return 0;
+}
+
+int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values)
+{
+ const struct dat_walk_ops ops = {
+ .pte_entry = __dat_peek_cmma_pte,
+ .pmd_entry = __dat_peek_cmma_crste,
+ .pud_entry = __dat_peek_cmma_crste,
+ .p4d_entry = __dat_peek_cmma_crste,
+ .pgd_entry = __dat_peek_cmma_crste,
+ };
+ struct dat_get_cmma_state state = { .values = values, };
+ int rc;
+
+ rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state);
+ *count = state.end - start;
+ /* Return success if at least one value was saved, otherwise an error. */
+ return (rc == -EFAULT && *count > 0) ? 0 : rc;
+}
+
+static long __dat_get_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+ struct dat_get_cmma_state *state = walk->priv;
+ union pgste pgste;
+
+ if (state->start != -1) {
+ if ((gfn - state->end) > KVM_S390_MAX_BIT_DISTANCE)
+ return 1;
+ if (gfn - state->start >= state->count)
+ return 1;
+ }
+
+ if (!READ_ONCE(*pgste_of(ptep)).cmma_d)
+ return 0;
+
+ pgste = pgste_get_lock(ptep);
+ if (pgste.cmma_d) {
+ if (state->start == -1)
+ state->start = gfn;
+ pgste.cmma_d = 0;
+ atomic64_dec(state->remaining);
+ state->values[gfn - state->start] = pgste.usage | pgste.nodat << 6;
+ state->end = next;
+ }
+ pgste_set_unlock(ptep, pgste);
+ return 0;
+}
+
+int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem)
+{
+ const struct dat_walk_ops ops = { .pte_entry = __dat_get_cmma_pte, };
+ struct dat_get_cmma_state state = {
+ .remaining = rem,
+ .values = values,
+ .count = *count,
+ .start = -1,
+ };
+
+ _dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state);
+
+ if (state.start == -1) {
+ *count = 0;
+ } else {
+ *count = state.end - state.start;
+ *start = state.start;
+ }
+
+ return 0;
+}
+
+struct dat_set_cmma_state {
+ unsigned long mask;
+ const u8 *bits;
+};
+
+static long __dat_set_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+ struct dat_set_cmma_state *state = walk->priv;
+ union pgste pgste, tmp;
+
+ tmp.val = (state->bits[gfn - walk->start] << 24) & state->mask;
+
+ pgste = pgste_get_lock(ptep);
+ pgste.usage = tmp.usage;
+ pgste.nodat = tmp.nodat;
+ pgste_set_unlock(ptep, pgste);
+
+ return 0;
+}
+
+/*
+ * This function sets the CMMA attributes for the given pages. If the input
+ * buffer has zero length, no action is taken, otherwise the attributes are
+ * set and the mm->context.uses_cmm flag is set.
+ */
+int dat_set_cmma_bits(union asce asce, gfn_t gfn, unsigned long count,
+ unsigned long mask, const uint8_t *bits)
+{
+ const struct dat_walk_ops ops = { .pte_entry = __dat_set_cmma_pte, };
+ struct dat_set_cmma_state state = { .mask = mask, .bits = bits, };
+ union crste *crstep;
+ union pte *ptep;
+ gfn_t cur;
+ int rc;
+
+ for (cur = ALIGN_DOWN(gfn, _PAGE_ENTRIES); cur < gfn + count; cur += _PAGE_ENTRIES)
+ dat_entry_walk(cur, asce, DAT_WALK_ALLOC, LEVEL_PTE, &crstep, &ptep);
+ rc = _dat_walk_gfn_range(gfn, gfn + count, asce, &ops, DAT_WALK_IGN_HOLES, &state);
+ return rc;
+}
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index b695eae5d763..4d0ceeada40f 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -18,6 +18,15 @@
#include <asm/pgalloc.h>
#include <asm/dat-bits.h>
+/*
+ * Base address and length must be sent at the start of each block, therefore
+ * it's cheaper to send some clean data, as long as it's less than the size of
+ * two longs.
+ */
+#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+/* for consistency */
+#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+
#define _ASCE(x) ((union asce) { .val = (x), })
#define NULL_ASCE _ASCE(0)
@@ -418,6 +427,17 @@ static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool w, bool d)
return res;
}
+union essa_state {
+ unsigned char val;
+ struct {
+ unsigned char : 2;
+ unsigned char nodat : 1;
+ unsigned char exception : 1;
+ unsigned char usage : 2;
+ unsigned char content : 2;
+ };
+};
+
/**
* 0 1 2 3 4 5 6 7
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -459,6 +479,13 @@ int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
int dat_link(kvm_pfn_t pfn, gfn_t gfn, union asce asce, int level, bool w, bool d, bool s, bool sk);
+int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
+long dat_reset_cmma(union asce asce, gfn_t start_gfn);
+int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values);
+int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem);
+int dat_set_cmma_bits(union asce asce, gfn_t gfn, unsigned long count, unsigned long mask,
+ const uint8_t *bits);
+
static inline struct crst_table *crste_table_start(union crste *crstep)
{
return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
--
2.51.0
next prev parent reply other threads:[~2025-09-10 18:07 UTC|newest]
Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 18:07 [PATCH v2 00/20] KVM: s390: gmap rewrite, the real deal Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 01/20] KVM: s390: add P bit in table entry bitfields, move union vaddress Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 02/20] s390: Move sske_frame() to a header Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 03/20] KVM: s390: Add gmap_helper_set_unused() Claudio Imbrenda
2025-09-11 8:38 ` Nico Boehr
2025-09-12 9:17 ` Nina Schoetterl-Glausch
2025-09-15 11:33 ` Claudio Imbrenda
2025-10-27 18:00 ` Nina Schoetterl-Glausch
2025-09-10 18:07 ` [PATCH v2 04/20] KVM: s390: Enable KVM_GENERIC_MMU_NOTIFIER Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 05/20] KVM: s390: Add helper functions for fault handling Claudio Imbrenda
2025-09-12 17:56 ` Nina Schoetterl-Glausch
2025-09-15 11:49 ` Claudio Imbrenda
2025-09-18 14:19 ` Alexander Gordeev
2025-09-18 14:46 ` Claudio Imbrenda
2025-09-18 14:41 ` Alexander Gordeev
2025-09-18 15:10 ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 06/20] KVM: s390: Rename some functions in gaccess.c Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 07/20] KVM: s390: KVM-specific bitfields and helper functions Claudio Imbrenda
2025-09-17 12:18 ` Heiko Carstens
2025-09-17 12:51 ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 08/20] KVM: s390: KVM page table management functions: allocation Claudio Imbrenda
2025-09-11 8:22 ` Janosch Frank
2025-09-11 8:43 ` Claudio Imbrenda
2025-09-16 16:26 ` Heiko Carstens
2025-09-16 16:47 ` Claudio Imbrenda
2025-09-16 17:01 ` Christian Borntraeger
2025-09-16 17:05 ` Claudio Imbrenda
2025-09-16 17:06 ` Christian Borntraeger
2025-09-16 17:36 ` Heiko Carstens
2025-09-17 7:27 ` Heiko Carstens
2025-09-17 11:25 ` Claudio Imbrenda
2025-09-17 12:30 ` Heiko Carstens
2025-09-17 13:11 ` Claudio Imbrenda
2025-09-17 13:26 ` Christian Borntraeger
2025-09-17 14:00 ` Claudio Imbrenda
2025-09-17 14:05 ` Christian Borntraeger
2025-09-17 14:11 ` Claudio Imbrenda
2025-09-17 17:08 ` Claudio Imbrenda
2025-09-17 13:31 ` Heiko Carstens
2025-09-17 14:00 ` Claudio Imbrenda
2025-09-17 12:12 ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 09/20] KVM: s390: KVM page table management functions: clear and replace Claudio Imbrenda
2025-09-11 12:57 ` Janosch Frank
2025-09-11 13:19 ` Claudio Imbrenda
2025-09-11 13:27 ` Janosch Frank
2025-09-16 15:56 ` Heiko Carstens
2025-09-16 16:47 ` Heiko Carstens
2025-09-16 17:04 ` Claudio Imbrenda
2025-09-16 17:27 ` Heiko Carstens
2025-09-10 18:07 ` [PATCH v2 10/20] KVM: s390: KVM page table management functions: walks Claudio Imbrenda
2025-09-11 12:56 ` Janosch Frank
2025-09-11 13:14 ` Claudio Imbrenda
2025-09-12 5:47 ` Gerd Bayer
2025-09-16 16:22 ` Heiko Carstens
2025-09-16 16:48 ` Claudio Imbrenda
2025-09-16 17:24 ` Heiko Carstens
2025-09-17 11:14 ` Claudio Imbrenda
2025-09-17 12:55 ` Heiko Carstens
2025-09-17 13:13 ` Claudio Imbrenda
2025-09-17 13:24 ` Heiko Carstens
2025-09-17 14:01 ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 11/20] KVM: s390: KVM page table management functions: storage keys Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 12/20] KVM: s390: KVM page table management functions: lifecycle management Claudio Imbrenda
2025-09-10 18:07 ` Claudio Imbrenda [this message]
2025-09-10 18:07 ` [PATCH v2 14/20] KVM: s390: New gmap code Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 15/20] KVM: s390: Stop using CONFIG_PGSTE Claudio Imbrenda
2025-09-16 7:45 ` Steffen Eiden
2025-09-10 18:07 ` [PATCH v2 16/20] KVM: s390: Switch to new gmap Claudio Imbrenda
2025-09-17 13:20 ` Heiko Carstens
2025-09-10 18:07 ` [PATCH v2 17/20] KVM: s390: Remove gmap from s390/mm Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 18/20] KVM: S390: Remove PGSTE code from linux/s390 mm Claudio Imbrenda
2025-09-16 7:30 ` Steffen Eiden
2025-09-16 9:24 ` Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 19/20] KVM: s390: Enable 1M pages for gmap Claudio Imbrenda
2025-09-10 18:07 ` [PATCH v2 20/20] KVM: s390: Storage key manipulation IOCTL Claudio Imbrenda
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250910180746.125776-14-imbrenda@linux.ibm.com \
--to=imbrenda@linux.ibm.com \
--cc=agordeev@linux.ibm.com \
--cc=borntraeger@de.ibm.com \
--cc=david@redhat.com \
--cc=frankja@linux.ibm.com \
--cc=gerald.schaefer@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=nrb@linux.ibm.com \
--cc=nsg@linux.ibm.com \
--cc=schlameuss@linux.ibm.com \
--cc=seiden@linux.ibm.com \
--cc=svens@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox