[PATCH v1 0/5] KVM: s390: some cleanup and small fixes

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v1 0/5] KVM: s390: some cleanup and small fixes
@ 2025-05-14 16:38 Claudio Imbrenda
  2025-05-14 16:38 ` [PATCH v1 1/5] s390: remove unneeded includes Claudio Imbrenda
                   ` (5 more replies)
  0 siblings, 6 replies; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-14 16:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

This series has some cleanups and small fixes in preparation of the
upcoming series that will finally completely move all guest page table
handling into kvm. The cleaups and fixes in this series are good enough
on their own, hence why they are being sent now.

Claudio Imbrenda (5):
  s390: remove unneeded includes
  KVM: s390: remove unneeded srcu lock
  KVM: s390: refactor some functions in priv.c
  KVM: s390: refactor and split some gmap helpers
  KVM: s390: simplify and move pv code

 MAINTAINERS                          |   2 +
 arch/s390/include/asm/gmap_helpers.h |  18 ++
 arch/s390/include/asm/tlb.h          |   1 +
 arch/s390/include/asm/uv.h           |   1 -
 arch/s390/kernel/uv.c                |  12 +-
 arch/s390/kvm/Makefile               |   2 +-
 arch/s390/kvm/diag.c                 |  11 +-
 arch/s390/kvm/gaccess.c              |   3 +-
 arch/s390/kvm/gmap-vsie.c            |   1 -
 arch/s390/kvm/gmap.c                 | 121 -----------
 arch/s390/kvm/gmap.h                 |  39 ----
 arch/s390/kvm/intercept.c            |  10 +-
 arch/s390/kvm/kvm-s390.c             |   8 +-
 arch/s390/kvm/kvm-s390.h             |  57 ++++++
 arch/s390/kvm/priv.c                 | 292 ++++++++++++---------------
 arch/s390/kvm/pv.c                   |  61 +++++-
 arch/s390/kvm/vsie.c                 |  19 +-
 arch/s390/mm/Makefile                |   2 +
 arch/s390/mm/fault.c                 |   1 -
 arch/s390/mm/gmap.c                  |  47 +----
 arch/s390/mm/gmap_helpers.c          | 266 ++++++++++++++++++++++++
 arch/s390/mm/init.c                  |   1 -
 arch/s390/mm/pgalloc.c               |   2 -
 arch/s390/mm/pgtable.c               |   1 -
 24 files changed, 590 insertions(+), 388 deletions(-)
 create mode 100644 arch/s390/include/asm/gmap_helpers.h
 delete mode 100644 arch/s390/kvm/gmap.c
 delete mode 100644 arch/s390/kvm/gmap.h
 create mode 100644 arch/s390/mm/gmap_helpers.c

-- 
2.49.0


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v1 1/5] s390: remove unneeded includes
  2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
@ 2025-05-14 16:38 ` Claudio Imbrenda
  2025-05-15 13:56   ` Christoph Schlameuss
  2025-05-14 16:38 ` [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock Claudio Imbrenda
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-14 16:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

Many files don't need to include asm/tlb.h or asm/gmap.h.
On the other hand, asm/tlb.h does need to include asm/gmap.h.

Remove all unneeded includes so that asm/tlb.h is not directly used by
s390 arch code anymore. Remove asm/gmap.h from a few other files as
well, so that now only KVM code, mm/gmap.c, and asm/tlb.h include it.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/include/asm/tlb.h | 1 +
 arch/s390/include/asm/uv.h  | 1 -
 arch/s390/kvm/intercept.c   | 1 +
 arch/s390/mm/fault.c        | 1 -
 arch/s390/mm/gmap.c         | 1 -
 arch/s390/mm/init.c         | 1 -
 arch/s390/mm/pgalloc.c      | 2 --
 arch/s390/mm/pgtable.c      | 1 -
 8 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index f20601995bb0..56d5f9e0eb2e 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,6 +36,7 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
 
 #include <asm/tlbflush.h>
 #include <asm-generic/tlb.h>
+#include <asm/gmap.h>
 
 /*
  * Release the page cache reference for a pte removed by
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 46fb0ef6f984..eeb2db4783e6 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -16,7 +16,6 @@
 #include <linux/bug.h>
 #include <linux/sched.h>
 #include <asm/page.h>
-#include <asm/gmap.h>
 #include <asm/asm.h>
 
 #define UVC_CC_OK	0
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a06a000f196c..b4834bd4d216 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -16,6 +16,7 @@
 #include <asm/irq.h>
 #include <asm/sysinfo.h>
 #include <asm/uv.h>
+#include <asm/gmap.h>
 
 #include "kvm-s390.h"
 #include "gaccess.h"
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index da84ff6770de..3829521450dd 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -40,7 +40,6 @@
 #include <asm/ptrace.h>
 #include <asm/fault.h>
 #include <asm/diag.h>
-#include <asm/gmap.h>
 #include <asm/irq.h>
 #include <asm/facility.h>
 #include <asm/uv.h>
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a94bd4870c65..4869555ff403 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -24,7 +24,6 @@
 #include <asm/machine.h>
 #include <asm/gmap.h>
 #include <asm/page.h>
-#include <asm/tlb.h>
 
 /*
  * The address is saved in a radix tree directly; NULL would be ambiguous,
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index afa085e8186c..074bf4fb4ce2 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -40,7 +40,6 @@
 #include <asm/kfence.h>
 #include <asm/dma.h>
 #include <asm/abs_lowcore.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/sclp.h>
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index e3a6f8ae156c..ddab36875370 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -12,8 +12,6 @@
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 unsigned long *crst_table_alloc(struct mm_struct *mm)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9901934284ec..7df70cd8f739 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -20,7 +20,6 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock
  2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
  2025-05-14 16:38 ` [PATCH v1 1/5] s390: remove unneeded includes Claudio Imbrenda
@ 2025-05-14 16:38 ` Claudio Imbrenda
  2025-05-19  8:25   ` Christian Borntraeger
  2025-05-19 14:42   ` Nina Schoetterl-Glausch
  2025-05-14 16:38 ` [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c Claudio Imbrenda
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-14 16:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

All paths leading to handle_essa() already hold the kvm->srcu.
Remove unneeded srcu locking from handle_essa().

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/kvm/priv.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1a49b89706f8..758cefb5bac7 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1297,12 +1297,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		/* Retry the ESSA instruction */
 		kvm_s390_retry_instr(vcpu);
 	} else {
-		int srcu_idx;
-
 		mmap_read_lock(vcpu->kvm->mm);
-		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		i = __do_essa(vcpu, orc);
-		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		mmap_read_unlock(vcpu->kvm->mm);
 		if (i < 0)
 			return i;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c
  2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
  2025-05-14 16:38 ` [PATCH v1 1/5] s390: remove unneeded includes Claudio Imbrenda
  2025-05-14 16:38 ` [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock Claudio Imbrenda
@ 2025-05-14 16:38 ` Claudio Imbrenda
  2025-05-20 12:49   ` Nina Schoetterl-Glausch
  2025-05-14 16:38 ` [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers Claudio Imbrenda
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-14 16:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

Refactor some functions in priv.c to make them more readable.

handle_{iske,rrbe,sske}: move duplicated checks into a single function.
handle{pfmf,epsw}: improve readability.
handle_lpswe{,y}: merge implementations since they are almost the same.

Use u64_replace_bits() where it makes sense.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/kvm/kvm-s390.h |  15 ++
 arch/s390/kvm/priv.c     | 288 ++++++++++++++++++---------------------
 2 files changed, 148 insertions(+), 155 deletions(-)

diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 8d3bbb2dd8d2..f8c32527c4e4 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -22,6 +22,21 @@
 
 #define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
 
+static inline bool kvm_s390_is_amode_24(struct kvm_vcpu *vcpu)
+{
+	return psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT;
+}
+
+static inline bool kvm_s390_is_amode_31(struct kvm_vcpu *vcpu)
+{
+	return psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_31BIT;
+}
+
+static inline bool kvm_s390_is_amode_64(struct kvm_vcpu *vcpu)
+{
+	return psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT;
+}
+
 static inline void kvm_s390_fpu_store(struct kvm_run *run)
 {
 	fpu_stfpc(&run->s.regs.fpc);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 758cefb5bac7..1a26aa591c2e 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -14,6 +14,7 @@
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
 #include <linux/io.h>
+#include <linux/bitfield.h>
 #include <asm/asm-offsets.h>
 #include <asm/facility.h>
 #include <asm/current.h>
@@ -253,29 +254,50 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+struct skeys_ops_state {
+	int reg1;
+	int reg2;
+	int rc;
+	unsigned long gaddr;
+};
+
+static bool skeys_common_checks(struct kvm_vcpu *vcpu, struct skeys_ops_state *state, bool abs)
+{
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) {
+		state->rc = kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+		return true;
+	}
+
+	state->rc = try_handle_skey(vcpu);
+	if (state->rc)
+		return true;
+
+	kvm_s390_get_regs_rre(vcpu, &state->reg1, &state->reg2);
+
+	state->gaddr = vcpu->run->s.regs.gprs[state->reg2] & PAGE_MASK;
+	state->gaddr = kvm_s390_logical_to_effective(vcpu, state->gaddr);
+	if (!abs)
+		state->gaddr = kvm_s390_real_to_abs(vcpu, state->gaddr);
+
+	return false;
+}
+
 static int handle_iske(struct kvm_vcpu *vcpu)
 {
-	unsigned long gaddr, vmaddr;
+	struct skeys_ops_state state;
+	unsigned long vmaddr;
 	unsigned char key;
-	int reg1, reg2;
 	bool unlocked;
+	u64 *r1;
 	int rc;
 
 	vcpu->stat.instruction_iske++;
 
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
-	rc = try_handle_skey(vcpu);
-	if (rc)
-		return rc != -EAGAIN ? rc : 0;
-
-	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	if (skeys_common_checks(vcpu, &state, false))
+		return state.rc;
+	r1 = vcpu->run->s.regs.gprs + state.reg1;
 
-	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
-	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
-	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
 	if (kvm_is_error_hva(vmaddr))
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 retry:
@@ -296,33 +318,23 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	if (rc < 0)
 		return rc;
-	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
-	vcpu->run->s.regs.gprs[reg1] |= key;
+	*r1 = u64_replace_bits(*r1, key, 0xff);
 	return 0;
 }
 
 static int handle_rrbe(struct kvm_vcpu *vcpu)
 {
-	unsigned long vmaddr, gaddr;
-	int reg1, reg2;
+	struct skeys_ops_state state;
+	unsigned long vmaddr;
 	bool unlocked;
 	int rc;
 
 	vcpu->stat.instruction_rrbe++;
 
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
-	rc = try_handle_skey(vcpu);
-	if (rc)
-		return rc != -EAGAIN ? rc : 0;
-
-	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	if (skeys_common_checks(vcpu, &state, false))
+		return state.rc;
 
-	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
-	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
-	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
 	if (kvm_is_error_hva(vmaddr))
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 retry:
@@ -353,40 +365,30 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
 static int handle_sske(struct kvm_vcpu *vcpu)
 {
 	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
+	struct skeys_ops_state state;
 	unsigned long start, end;
 	unsigned char key, oldkey;
-	int reg1, reg2;
+	bool nq, mr, mc, mb;
 	bool unlocked;
+	u64 *r1, *r2;
 	int rc;
 
 	vcpu->stat.instruction_sske++;
 
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
-	rc = try_handle_skey(vcpu);
-	if (rc)
-		return rc != -EAGAIN ? rc : 0;
-
-	if (!test_kvm_facility(vcpu->kvm, 8))
-		m3 &= ~SSKE_MB;
-	if (!test_kvm_facility(vcpu->kvm, 10))
-		m3 &= ~(SSKE_MC | SSKE_MR);
-	if (!test_kvm_facility(vcpu->kvm, 14))
-		m3 &= ~SSKE_NQ;
+	mb = test_kvm_facility(vcpu->kvm, 8) && (m3 & SSKE_MB);
+	mr = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MR);
+	mc = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MC);
+	nq = test_kvm_facility(vcpu->kvm, 14) && (m3 & SSKE_NQ);
 
-	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	/* start already designates an absolute address if MB is set */
+	if (skeys_common_checks(vcpu, &state, mb))
+		return state.rc;
 
-	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
-	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	start = kvm_s390_logical_to_effective(vcpu, start);
-	if (m3 & SSKE_MB) {
-		/* start already designates an absolute address */
-		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
-	} else {
-		start = kvm_s390_real_to_abs(vcpu, start);
-		end = start + PAGE_SIZE;
-	}
+	start = state.gaddr;
+	end = mb ? ALIGN(start + 1, _SEGMENT_SIZE) : start + PAGE_SIZE;
+	r1 = vcpu->run->s.regs.gprs + state.reg1;
+	r2 = vcpu->run->s.regs.gprs + state.reg2;
+	key = *r1 & 0xfe;
 
 	while (start != end) {
 		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
@@ -396,9 +398,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 		mmap_read_lock(current->mm);
-		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
-						m3 & SSKE_NQ, m3 & SSKE_MR,
-						m3 & SSKE_MC);
+		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, nq, mr, mc);
 
 		if (rc < 0) {
 			rc = fixup_user_fault(current->mm, vmaddr,
@@ -415,23 +415,21 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 		start += PAGE_SIZE;
 	}
 
-	if (m3 & (SSKE_MC | SSKE_MR)) {
-		if (m3 & SSKE_MB) {
+	if (mc || mr) {
+		if (mb) {
 			/* skey in reg1 is unpredictable */
 			kvm_s390_set_psw_cc(vcpu, 3);
 		} else {
 			kvm_s390_set_psw_cc(vcpu, rc);
-			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
-			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
+			*r1 = u64_replace_bits(*r1, oldkey << 8, 0xff00);
 		}
 	}
-	if (m3 & SSKE_MB) {
-		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
-			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
-		else
-			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
+	if (mb) {
 		end = kvm_s390_logical_to_effective(vcpu, end);
-		vcpu->run->s.regs.gprs[reg2] |= end;
+		if (kvm_s390_is_amode_64(vcpu))
+			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
+		else
+			*r2 = u64_replace_bits(*r2, end, 0xfffff000);
 	}
 	return 0;
 }
@@ -773,46 +771,28 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int handle_lpswe(struct kvm_vcpu *vcpu)
+static int handle_lpswe_y(struct kvm_vcpu *vcpu, bool lpswey)
 {
 	psw_t new_psw;
 	u64 addr;
 	int rc;
 	u8 ar;
 
-	vcpu->stat.instruction_lpswe++;
-
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
-	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
-	if (addr & 7)
-		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
-	if (rc)
-		return kvm_s390_inject_prog_cond(vcpu, rc);
-	vcpu->arch.sie_block->gpsw = new_psw;
-	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
-		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	return 0;
-}
-
-static int handle_lpswey(struct kvm_vcpu *vcpu)
-{
-	psw_t new_psw;
-	u64 addr;
-	int rc;
-	u8 ar;
-
-	vcpu->stat.instruction_lpswey++;
+	if (lpswey)
+		vcpu->stat.instruction_lpswey++;
+	else
+		vcpu->stat.instruction_lpswe++;
 
-	if (!test_kvm_facility(vcpu->kvm, 193))
+	if (lpswey && !test_kvm_facility(vcpu->kvm, 193))
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+	if (!lpswey)
+		addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	else
+		addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
@@ -1034,7 +1014,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 	case 0xb1:
 		return handle_stfl(vcpu);
 	case 0xb2:
-		return handle_lpswe(vcpu);
+		return handle_lpswe_y(vcpu, false);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1043,42 +1023,50 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 static int handle_epsw(struct kvm_vcpu *vcpu)
 {
 	int reg1, reg2;
+	u64 *r1, *r2;
 
 	vcpu->stat.instruction_epsw++;
 
 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	r1 = vcpu->run->s.regs.gprs + reg1;
+	r2 = vcpu->run->s.regs.gprs + reg2;
 
 	/* This basically extracts the mask half of the psw. */
-	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
-	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
-	if (reg2) {
-		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
-		vcpu->run->s.regs.gprs[reg2] |=
-			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
-	}
+	*r1 = u64_replace_bits(*r1, vcpu->arch.sie_block->gpsw.mask >> 32, 0xffffffff);
+	if (reg2)
+		*r2 = u64_replace_bits(*r2, vcpu->arch.sie_block->gpsw.mask, 0xffffffff);
 	return 0;
 }
 
 #define PFMF_RESERVED   0xfffc0101UL
-#define PFMF_SK         0x00020000UL
-#define PFMF_CF         0x00010000UL
-#define PFMF_UI         0x00008000UL
-#define PFMF_FSC        0x00007000UL
-#define PFMF_NQ         0x00000800UL
-#define PFMF_MR         0x00000400UL
-#define PFMF_MC         0x00000200UL
-#define PFMF_KEY        0x000000feUL
+union pfmf_r1 {
+	unsigned long val;
+	struct {
+		unsigned long    :46;
+		unsigned long sk : 1;
+		unsigned long cf : 1;
+		unsigned long ui : 1;
+		unsigned long fsc: 3;
+		unsigned long nq : 1;
+		unsigned long mr : 1;
+		unsigned long mc : 1;
+		unsigned long    : 1;
+		unsigned char skey;
+	} __packed;
+};
+
+static_assert(sizeof(union pfmf_r1) == sizeof(unsigned long));
 
 static int handle_pfmf(struct kvm_vcpu *vcpu)
 {
-	bool mr = false, mc = false, nq;
 	int reg1, reg2;
 	unsigned long start, end;
-	unsigned char key;
+	union pfmf_r1 r1;
 
 	vcpu->stat.instruction_pfmf++;
 
 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	r1.val = vcpu->run->s.regs.gprs[reg1];
 
 	if (!test_kvm_facility(vcpu->kvm, 8))
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
@@ -1086,47 +1074,38 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+	if (r1.val & PFMF_RESERVED)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Only provide non-quiescing support if enabled for the guest */
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
-	    !test_kvm_facility(vcpu->kvm, 14))
+	if (r1.nq && !test_kvm_facility(vcpu->kvm, 14))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Only provide conditional-SSKE support if enabled for the guest */
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK &&
-	    test_kvm_facility(vcpu->kvm, 10)) {
-		mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR;
-		mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC;
-	}
+	if (!r1.sk || !test_kvm_facility(vcpu->kvm, 10))
+		r1.mr = r1.mc = 0;
 
-	nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
-	key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
 	start = kvm_s390_logical_to_effective(vcpu, start);
 
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
-			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-	}
+	if (r1.cf && kvm_s390_check_low_addr_prot_real(vcpu, start))
+		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 
-	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
-	case 0x00000000:
+	switch (r1.fsc) {
+	case 0:
 		/* only 4k frames specify a real address */
 		start = kvm_s390_real_to_abs(vcpu, start);
-		end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+		end = ALIGN(start + 1, PAGE_SIZE);
 		break;
-	case 0x00001000:
-		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
+	case 1:
+		end = ALIGN(start + 1, _SEGMENT_SIZE);
 		break;
-	case 0x00002000:
+	case 2:
 		/* only support 2G frame size if EDAT2 is available and we are
 		   not in 24-bit addressing mode */
-		if (!test_kvm_facility(vcpu->kvm, 78) ||
-		    psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
+		if (!test_kvm_facility(vcpu->kvm, 78) || kvm_s390_is_amode_24(vcpu))
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-		end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
+		end = ALIGN(start + 1, _REGION3_SIZE);
 		break;
 	default:
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -1141,19 +1120,17 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		if (kvm_is_error_hva(vmaddr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
-		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-			if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
-				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		}
+		if (r1.cf && kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
-		if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+		if (r1.sk) {
 			int rc = kvm_s390_skey_check_enable(vcpu);
 
 			if (rc)
 				return rc;
 			mmap_read_lock(current->mm);
-			rc = cond_set_guest_storage_key(current->mm, vmaddr,
-							key, NULL, nq, mr, mc);
+			rc = cond_set_guest_storage_key(current->mm, vmaddr, r1.skey, NULL,
+							r1.nq, r1.mr, r1.mc);
 			if (rc < 0) {
 				rc = fixup_user_fault(current->mm, vmaddr,
 						      FAULT_FLAG_WRITE, &unlocked);
@@ -1169,14 +1146,14 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		}
 		start += PAGE_SIZE;
 	}
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
-		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
-			vcpu->run->s.regs.gprs[reg2] = end;
-		} else {
-			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
-			end = kvm_s390_logical_to_effective(vcpu, end);
-			vcpu->run->s.regs.gprs[reg2] |= end;
-		}
+	if (r1.fsc) {
+		u64 *r2 = vcpu->run->s.regs.gprs + reg2;
+
+		end = kvm_s390_logical_to_effective(vcpu, end);
+		if (kvm_s390_is_amode_64(vcpu))
+			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
+		else
+			*r2 = u64_replace_bits(*r2, end, 0xfffff000);
 	}
 	return 0;
 }
@@ -1361,8 +1338,9 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	reg = reg1;
 	nr_regs = 0;
 	do {
-		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
+		u64 *cr = vcpu->arch.sie_block->gcr + reg;
+
+		*cr = u64_replace_bits(*cr, ctl_array[nr_regs++], 0xffffffff);
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
@@ -1489,7 +1467,7 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
 	case 0x62:
 		return handle_ri(vcpu);
 	case 0x71:
-		return handle_lpswey(vcpu);
+		return handle_lpswe_y(vcpu, true);
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
  2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
                   ` (2 preceding siblings ...)
  2025-05-14 16:38 ` [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c Claudio Imbrenda
@ 2025-05-14 16:38 ` Claudio Imbrenda
  2025-05-21 14:55   ` Nina Schoetterl-Glausch
  2025-05-14 16:38 ` [PATCH v1 5/5] KVM: s390: simplify and move pv code Claudio Imbrenda
  2025-05-14 17:41 ` [PATCH v1 0/5] KVM: s390: some cleanup and small fixes David Hildenbrand
  5 siblings, 1 reply; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-14 16:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

Refactor some gmap functions; move the implementation into a separate
file with only helper functions. The new helper functions work on vm
addresses, leaving all gmap logic in the gmap functions, which mostly
become just wrappers.

The whole gmap handling is going to be moved inside KVM soon, but the
helper functions need to touch core mm functions, and thus need to
stay in the core of kernel.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 MAINTAINERS                          |   2 +
 arch/s390/include/asm/gmap_helpers.h |  18 ++
 arch/s390/kvm/diag.c                 |  11 +-
 arch/s390/kvm/kvm-s390.c             |   3 +-
 arch/s390/mm/Makefile                |   2 +
 arch/s390/mm/gmap.c                  |  46 ++---
 arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
 7 files changed, 307 insertions(+), 41 deletions(-)
 create mode 100644 arch/s390/include/asm/gmap_helpers.h
 create mode 100644 arch/s390/mm/gmap_helpers.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f21f1dabb5fe..b0a8fb5a254c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13093,12 +13093,14 @@ S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
 F:	Documentation/virt/kvm/s390*
 F:	arch/s390/include/asm/gmap.h
+F:	arch/s390/include/asm/gmap_helpers.h
 F:	arch/s390/include/asm/kvm*
 F:	arch/s390/include/uapi/asm/kvm*
 F:	arch/s390/include/uapi/asm/uvdevice.h
 F:	arch/s390/kernel/uv.c
 F:	arch/s390/kvm/
 F:	arch/s390/mm/gmap.c
+F:	arch/s390/mm/gmap_helpers.c
 F:	drivers/s390/char/uvdevice.c
 F:	tools/testing/selftests/drivers/s390x/uvdevice/
 F:	tools/testing/selftests/kvm/*/s390/
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..1854fe6a8c33
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr);
+void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 74f73141f9b9..ce8b2f8125c4 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,6 +11,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
 #include "trace.h"
@@ -37,7 +38,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 	 * fast path (no prefix swap page involved)
 	 */
 	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
-		gmap_discard(vcpu->arch.gmap, start, end);
+		gmap_helper_discard(vcpu->kvm->mm, start, end);
 	} else {
 		/*
 		 * This is slow path.  gmap_discard will check for start
@@ -45,12 +46,12 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 		 * prefix and let gmap_discard make some of these calls
 		 * NOPs.
 		 */
-		gmap_discard(vcpu->arch.gmap, start, prefix);
+		gmap_helper_discard(vcpu->kvm->mm, start, prefix);
 		if (start <= prefix)
-			gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+			gmap_helper_discard(vcpu->kvm->mm, 0, PAGE_SIZE);
 		if (end > prefix + PAGE_SIZE)
-			gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
-		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+			gmap_helper_discard(vcpu->kvm->mm, PAGE_SIZE, 2 * PAGE_SIZE);
+		gmap_helper_discard(vcpu->kvm->mm, prefix + 2 * PAGE_SIZE, end);
 	}
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3175193fd7..b904f1bcb363 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -40,6 +40,7 @@
 #include <asm/machine.h>
 #include <asm/stp.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/nmi.h>
 #include <asm/isc.h>
 #include <asm/sclp.h>
@@ -2674,7 +2675,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		if (r)
 			break;
 
-		r = s390_disable_cow_sharing();
+		r = gmap_helper_disable_cow_sharing();
 		if (r)
 			break;
 
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 9726b91fe7e4..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
 obj-$(CONFIG_PFAULT)		+= pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM))	+= gmap_helpers.o
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4869555ff403..17a2a57de3a2 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -22,6 +22,7 @@
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
 #include <asm/machine.h>
+#include <asm/gmap_helpers.h>
 #include <asm/gmap.h>
 #include <asm/page.h>
 
@@ -619,58 +620,33 @@ EXPORT_SYMBOL(__gmap_link);
  */
 void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
-	struct vm_area_struct *vma;
 	unsigned long vmaddr;
-	spinlock_t *ptl;
-	pte_t *ptep;
+
+	mmap_assert_locked(gmap->mm);
 
 	/* Find the vm address for the guest address */
 	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
 						   gaddr >> PMD_SHIFT);
 	if (vmaddr) {
 		vmaddr |= gaddr & ~PMD_MASK;
-
-		vma = vma_lookup(gmap->mm, vmaddr);
-		if (!vma || is_vm_hugetlb_page(vma))
-			return;
-
-		/* Get pointer to the page table entry */
-		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-		if (likely(ptep)) {
-			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
-			pte_unmap_unlock(ptep, ptl);
-		}
+		__gmap_helper_zap_one(gmap->mm, vmaddr);
 	}
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
 {
-	unsigned long gaddr, vmaddr, size;
-	struct vm_area_struct *vma;
+	unsigned long vmaddr, next, start, end;
 
 	mmap_read_lock(gmap->mm);
-	for (gaddr = from; gaddr < to;
-	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
-		/* Find the vm address for the guest address */
-		vmaddr = (unsigned long)
-			radix_tree_lookup(&gmap->guest_to_host,
-					  gaddr >> PMD_SHIFT);
+	for ( ; from < to; from = next) {
+		next = ALIGN(from + 1, PMD_SIZE);
+		vmaddr = (unsigned long)radix_tree_lookup(&gmap->guest_to_host, from >> PMD_SHIFT);
 		if (!vmaddr)
 			continue;
-		vmaddr |= gaddr & ~PMD_MASK;
-		/* Find vma in the parent mm */
-		vma = find_vma(gmap->mm, vmaddr);
-		if (!vma)
-			continue;
-		/*
-		 * We do not discard pages that are backed by
-		 * hugetlbfs, so we don't have to refault them.
-		 */
-		if (is_vm_hugetlb_page(vma))
-			continue;
-		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range_single(vma, vmaddr, size, NULL);
+		start = vmaddr | (from & ~PMD_MASK);
+		end = (vmaddr | (min(to - 1, next - 1) & ~PMD_MASK)) + 1;
+		__gmap_helper_discard(gmap->mm, start, end);
 	}
 	mmap_read_unlock(gmap->mm);
 }
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..8e66586718f6
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *               Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *               David Hildenbrand <david@redhat.com>
+ *               Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry))
+		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+	free_swap_and_cache(entry);
+}
+
+void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	/* Find the vm address for the guest address */
+	vma = vma_lookup(mm, vmaddr);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return;
+
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(mm, vmaddr, &ptl);
+	if (!likely(ptep))
+		return;
+	if (pte_swap(*ptep))
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);
+
+void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	unsigned long next;
+
+	mmap_assert_locked(mm);
+
+	while (vmaddr < end) {
+		vma = find_vma_intersection(mm, vmaddr, end);
+		if (!vma)
+			break;
+		vmaddr = max(vmaddr, vma->vm_start);
+		next = min(end, vma->vm_end);
+		if (!is_vm_hugetlb_page(vma))
+			zap_page_range_single(vma, vmaddr, next - vmaddr, NULL);
+		vmaddr = next;
+	}
+}
+
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	mmap_read_lock(mm);
+	__gmap_helper_discard(mm, vmaddr, end);
+	mmap_read_unlock(mm);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
+{
+	struct vm_area_struct *vma;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	/* We need a valid VMA, otherwise this is clearly a fault. */
+	vma = vma_lookup(mm, addr);
+	if (!vma)
+		return -EFAULT;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		return -ENOENT;
+
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return -ENOENT;
+
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		return -ENOENT;
+
+	/* Large PUDs are not supported yet. */
+	if (pud_leaf(*pud))
+		return -EFAULT;
+
+	*pmdp = pmd_offset(pud, addr);
+	return 0;
+}
+
+void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr)
+{
+	spinlock_t *ptl;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	if (pmd_lookup(mm, vmaddr, &pmdp))
+		return;
+	ptl = pmd_lock(mm, pmdp);
+	if (!pmd_present(*pmdp) || pmd_leaf(*pmdp)) {
+		spin_unlock(ptl);
+		return;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_offset_map_lock(mm, pmdp, vmaddr, &ptl);
+	if (!ptep)
+		return;
+	/* The last byte of a pte can be changed freely without ipte */
+	__atomic64_or(_PAGE_UNUSED, (long *)ptep);
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(__gmap_helper_set_unused);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+				   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long *found_addr = walk->private;
+
+	/* Return 1 of the page is a zeropage. */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		/*
+		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
+		 * currently only works in COW mappings, which is also where
+		 * mm_forbids_zeropage() is checked.
+		 */
+		if (!is_cow_mapping(walk->vma->vm_flags))
+			return -EFAULT;
+
+		*found_addr = addr;
+		return 1;
+	}
+	return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+	.pte_entry      = find_zeropage_pte_entry,
+	.walk_lock      = PGWALK_WRLOCK,
+};
+
+/*
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+	unsigned long addr;
+	vm_fault_t fault;
+	int rc;
+
+	for_each_vma(vmi, vma) {
+		/*
+		 * We could only look at COW mappings, but it's more future
+		 * proof to catch unexpected zeropages in other mappings and
+		 * fail.
+		 */
+		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+			continue;
+		addr = vma->vm_start;
+
+retry:
+		rc = walk_page_range_vma(vma, addr, vma->vm_end,
+					 &find_zeropage_ops, &addr);
+		if (rc < 0)
+			return rc;
+		else if (!rc)
+			continue;
+
+		/* addr was updated by find_zeropage_pte_entry() */
+		fault = handle_mm_fault(vma, addr,
+					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+					NULL);
+		if (fault & VM_FAULT_OOM)
+			return -ENOMEM;
+		/*
+		 * See break_ksm(): even after handle_mm_fault() returned 0, we
+		 * must start the lookup from the current address, because
+		 * handle_mm_fault() may back out if there's any difficulty.
+		 *
+		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+		 * maybe they could trigger in the future on concurrent
+		 * truncation. In that case, the shared zeropage would be gone
+		 * and we can simply retry and make progress.
+		 */
+		cond_resched();
+		goto retry;
+	}
+
+	return 0;
+}
+
+static int __gmap_helper_disable_cow_sharing(struct mm_struct *mm)
+{
+	int rc;
+
+	if (!mm->context.allow_cow_sharing)
+		return 0;
+
+	mm->context.allow_cow_sharing = 0;
+
+	/* Replace all shared zeropages by anonymous pages. */
+	rc = __gmap_helper_unshare_zeropages(mm);
+	/*
+	 * Make sure to disable KSM (if enabled for the whole process or
+	 * individual VMAs). Note that nothing currently hinders user space
+	 * from re-enabling it.
+	 */
+	if (!rc)
+		rc = ksm_disable(mm);
+	if (rc)
+		mm->context.allow_cow_sharing = 1;
+	return rc;
+}
+
+/*
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+	int rc;
+
+	mmap_write_lock(current->mm);
+	rc = __gmap_helper_disable_cow_sharing(current->mm);
+	mmap_write_unlock(current->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v1 5/5] KVM: s390: simplify and move pv code
  2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
                   ` (3 preceding siblings ...)
  2025-05-14 16:38 ` [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers Claudio Imbrenda
@ 2025-05-14 16:38 ` Claudio Imbrenda
  2025-05-21 15:42   ` Nina Schoetterl-Glausch
  2025-05-14 17:41 ` [PATCH v1 0/5] KVM: s390: some cleanup and small fixes David Hildenbrand
  5 siblings, 1 reply; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-14 16:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

All functions in kvm/gmap.c fit better in kvm/pv.c instead.
Move and rename them appropriately, then delete the now empty
kvm/gmap.c and kvm/gmap.h.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/kernel/uv.c     |  12 ++--
 arch/s390/kvm/Makefile    |   2 +-
 arch/s390/kvm/gaccess.c   |   3 +-
 arch/s390/kvm/gmap-vsie.c |   1 -
 arch/s390/kvm/gmap.c      | 121 --------------------------------------
 arch/s390/kvm/gmap.h      |  39 ------------
 arch/s390/kvm/intercept.c |   9 +--
 arch/s390/kvm/kvm-s390.c  |   5 +-
 arch/s390/kvm/kvm-s390.h  |  42 +++++++++++++
 arch/s390/kvm/pv.c        |  61 ++++++++++++++++++-
 arch/s390/kvm/vsie.c      |  19 +++++-
 11 files changed, 133 insertions(+), 181 deletions(-)
 delete mode 100644 arch/s390/kvm/gmap.c
 delete mode 100644 arch/s390/kvm/gmap.h

diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 9a5d5be8acf4..644c110287c4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -135,7 +135,7 @@ int uv_destroy_folio(struct folio *folio)
 {
 	int rc;
 
-	/* See gmap_make_secure(): large folios cannot be secure */
+	/* Large folios cannot be secure */
 	if (unlikely(folio_test_large(folio)))
 		return 0;
 
@@ -184,7 +184,7 @@ int uv_convert_from_secure_folio(struct folio *folio)
 {
 	int rc;
 
-	/* See gmap_make_secure(): large folios cannot be secure */
+	/* Large folios cannot be secure */
 	if (unlikely(folio_test_large(folio)))
 		return 0;
 
@@ -403,15 +403,15 @@ EXPORT_SYMBOL_GPL(make_hva_secure);
 
 /*
  * To be called with the folio locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the folio concurrently. Having 2
- * parallel arch_make_folio_accessible is fine, as the UV calls will become a
- * no-op if the folio is already exported.
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
  */
 int arch_make_folio_accessible(struct folio *folio)
 {
 	int rc = 0;
 
-	/* See gmap_make_secure(): large folios cannot be secure */
+	/* Large folios cannot be secure */
 	if (unlikely(folio_test_large(folio)))
 		return 0;
 
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index f0ffe874adc2..9a723c48b05a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
 
 kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index f6fded15633a..e23670e1949c 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -16,9 +16,10 @@
 #include <asm/gmap.h>
 #include <asm/dat-bits.h>
 #include "kvm-s390.h"
-#include "gmap.h"
 #include "gaccess.h"
 
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
 /*
  * vaddress union in order to easily decode a virtual address into its
  * region first index, region second index etc. parts.
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
index a6d1dbb04c97..56ef153eb8fe 100644
--- a/arch/s390/kvm/gmap-vsie.c
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -22,7 +22,6 @@
 #include <asm/uv.h>
 
 #include "kvm-s390.h"
-#include "gmap.h"
 
 /**
  * gmap_find_shadow - find a specific asce in the list of shadow tables
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
deleted file mode 100644
index 6d8944d1b4a0..000000000000
--- a/arch/s390/kvm/gmap.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Guest memory management for KVM/s390
- *
- * Copyright IBM Corp. 2008, 2020, 2024
- *
- *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
- *               Martin Schwidefsky <schwidefsky@de.ibm.com>
- *               David Hildenbrand <david@redhat.com>
- *               Janosch Frank <frankja@linux.vnet.ibm.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/pgtable.h>
-#include <linux/pagemap.h>
-
-#include <asm/lowcore.h>
-#include <asm/gmap.h>
-#include <asm/uv.h>
-
-#include "gmap.h"
-
-/**
- * gmap_make_secure() - make one guest page secure
- * @gmap: the guest gmap
- * @gaddr: the guest address that needs to be made secure
- * @uvcb: the UVCB specifying which operation needs to be performed
- *
- * Context: needs to be called with kvm->srcu held.
- * Return: 0 on success, < 0 in case of error.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
-	struct kvm *kvm = gmap->private;
-	unsigned long vmaddr;
-
-	lockdep_assert_held(&kvm->srcu);
-
-	vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
-	if (kvm_is_error_hva(vmaddr))
-		return -EFAULT;
-	return make_hva_secure(gmap->mm, vmaddr, uvcb);
-}
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
-	struct uv_cb_cts uvcb = {
-		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
-		.header.len = sizeof(uvcb),
-		.guest_handle = gmap->guest_handle,
-		.gaddr = gaddr,
-	};
-
-	return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-
-/**
- * __gmap_destroy_page() - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @page: the page to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- *
- * Context: must be called holding the mm lock for gmap->mm
- */
-static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
-{
-	struct folio *folio = page_folio(page);
-	int rc;
-
-	/*
-	 * See gmap_make_secure(): large folios cannot be secure. Small
-	 * folio implies FW_LEVEL_PTE.
-	 */
-	if (folio_test_large(folio))
-		return -EFAULT;
-
-	rc = uv_destroy_folio(folio);
-	/*
-	 * Fault handlers can race; it is possible that two CPUs will fault
-	 * on the same secure page. One CPU can destroy the page, reboot,
-	 * re-enter secure mode and import it, while the second CPU was
-	 * stuck at the beginning of the handler. At some point the second
-	 * CPU will be able to progress, and it will not be able to destroy
-	 * the page. In that case we do not want to terminate the process,
-	 * we instead try to export the page.
-	 */
-	if (rc)
-		rc = uv_convert_from_secure_folio(folio);
-
-	return rc;
-}
-
-/**
- * gmap_destroy_page() - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- *
- * Context: may sleep.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
-	struct page *page;
-	int rc = 0;
-
-	mmap_read_lock(gmap->mm);
-	page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
-	if (page)
-		rc = __gmap_destroy_page(gmap, page);
-	kvm_release_page_clean(page);
-	mmap_read_unlock(gmap->mm);
-	return rc;
-}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
deleted file mode 100644
index c8f031c9ea5f..000000000000
--- a/arch/s390/kvm/gmap.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  KVM guest address space mapping code
- *
- *    Copyright IBM Corp. 2007, 2016, 2025
- *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- *               Claudio Imbrenda <imbrenda@linux.ibm.com>
- */
-
-#ifndef ARCH_KVM_S390_GMAP_H
-#define ARCH_KVM_S390_GMAP_H
-
-#define GMAP_SHADOW_FAKE_TABLE 1ULL
-
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- *                     given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
-	if (sg->removed)
-		return 0;
-	return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-
-#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b4834bd4d216..a2724c15b6f6 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -22,7 +22,6 @@
 #include "gaccess.h"
 #include "trace.h"
 #include "trace-s390.h"
-#include "gmap.h"
 
 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
 {
@@ -546,7 +545,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
 			  guest_uvcb->header.cmd);
 		return 0;
 	}
-	rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+	rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
 	/*
 	 * If the unpin did not succeed, the guest will exit again for the UVC
 	 * and we will retry the unpin.
@@ -654,10 +653,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		break;
 	case ICPT_PV_PREF:
 		rc = 0;
-		gmap_convert_to_secure(vcpu->arch.gmap,
-				       kvm_s390_get_prefix(vcpu));
-		gmap_convert_to_secure(vcpu->arch.gmap,
-				       kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+		kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
+		kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
 		break;
 	default:
 		return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b904f1bcb363..905e025a5084 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -53,7 +53,6 @@
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "pci.h"
-#include "gmap.h"
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -4974,7 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
 		 * previous protected guest. The old pages need to be destroyed
 		 * so the new guest can use them.
 		 */
-		if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
+		if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
 			/*
 			 * Either KVM messed up the secure guest mapping or the
 			 * same page is mapped into multiple secure guests.
@@ -4996,7 +4995,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
 		 * guest has not been imported yet. Try to import the page into
 		 * the protected guest.
 		 */
-		rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
+		rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
 		if (rc == -EINVAL)
 			send_sig(SIGSEGV, current, 0);
 		if (rc != -ENXIO)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f8c32527c4e4..855f0980dc78 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -323,6 +323,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
 				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
 			      u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
 
 static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
 {
@@ -334,6 +337,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pv.handle;
 }
 
+/**
+ * __kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static inline int __kvm_s390_pv_destroy_page(struct page *page)
+{
+	struct folio *folio = page_folio(page);
+	int rc;
+
+	/* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
+	if (folio_test_large(folio))
+		return -EFAULT;
+
+	rc = uv_destroy_folio(folio);
+	/*
+	 * Fault handlers can race; it is possible that two CPUs will fault
+	 * on the same secure page. One CPU can destroy the page, reboot,
+	 * re-enter secure mode and import it, while the second CPU was
+	 * stuck at the beginning of the handler. At some point the second
+	 * CPU will be able to progress, and it will not be able to destroy
+	 * the page. In that case we do not want to terminate the process,
+	 * we instead try to export the page.
+	 */
+	if (rc)
+		rc = uv_convert_from_secure_folio(folio);
+
+	return rc;
+}
+
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -413,6 +451,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
 				 unsigned long end);
 void kvm_s390_vsie_init(struct kvm *kvm);
 void kvm_s390_vsie_destroy(struct kvm *kvm);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+
+/* implemented in gmap-vsie.c */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 22c012aa5206..14c330ec8ceb 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,7 +17,6 @@
 #include <linux/sched/mm.h>
 #include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
-#include "gmap.h"
 
 bool kvm_s390_pv_is_protected(struct kvm *kvm)
 {
@@ -33,6 +32,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
 
+/**
+ * kvm_s390_pv_make_secure() - make one guest page secure
+ * @kvm: the guest
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error.
+ */
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
+{
+	unsigned long vmaddr;
+
+	lockdep_assert_held(&kvm->srcu);
+
+	vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+	if (kvm_is_error_hva(vmaddr))
+		return -EFAULT;
+	return make_hva_secure(kvm->mm, vmaddr, uvcb);
+}
+
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
+{
+	struct uv_cb_cts uvcb = {
+		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+		.gaddr = gaddr,
+	};
+
+	return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
+}
+
+/**
+ * kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @kvm: the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
+{
+	struct page *page;
+	int rc = 0;
+
+	mmap_read_lock(kvm->mm);
+	page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+	if (page)
+		rc = __kvm_s390_pv_destroy_page(page);
+	kvm_release_page_clean(page);
+	mmap_read_unlock(kvm->mm);
+	return rc;
+}
+
 /**
  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
  * be destroyed
@@ -638,7 +695,7 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
 		.tweak[0] = tweak,
 		.tweak[1] = offset,
 	};
-	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+	int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
 	unsigned long vmaddr;
 	bool unlocked;
 
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a78df3a4f353..13a9661d2b28 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -23,7 +23,6 @@
 #include <asm/facility.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
-#include "gmap.h"
 
 enum vsie_page_flags {
 	VSIE_PAGE_IN_USE = 0,
@@ -68,6 +67,24 @@ struct vsie_page {
 	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
 };
 
+/**
+ * gmap_shadow_valid() - check if a shadow guest address space matches the
+ *                       given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise; the
+ * caller has to request a new shadow gmap in this case.
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+	if (sg->removed)
+		return 0;
+	return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
 /* trigger a validity icpt for the given scb */
 static int set_validity_icpt(struct kvm_s390_sie_block *scb,
 			     __u16 reason_code)
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 0/5] KVM: s390: some cleanup and small fixes
  2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
                   ` (4 preceding siblings ...)
  2025-05-14 16:38 ` [PATCH v1 5/5] KVM: s390: simplify and move pv code Claudio Imbrenda
@ 2025-05-14 17:41 ` David Hildenbrand
  5 siblings, 0 replies; 21+ messages in thread
From: David Hildenbrand @ 2025-05-14 17:41 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, hca,
	agordeev, svens, gor

On 14.05.25 18:38, Claudio Imbrenda wrote:
> This series has some cleanups and small fixes in preparation of the
> upcoming series that will finally completely move all guest page table
> handling into kvm. The cleaups and fixes in this series are good enough
> on their own, hence why they are being sent now.
> 
> Claudio Imbrenda (5):
>    s390: remove unneeded includes
>    KVM: s390: remove unneeded srcu lock
>    KVM: s390: refactor some functions in priv.c
>    KVM: s390: refactor and split some gmap helpers
>    KVM: s390: simplify and move pv code
> 
>   MAINTAINERS                          |   2 +
>   arch/s390/include/asm/gmap_helpers.h |  18 ++
>   arch/s390/include/asm/tlb.h          |   1 +
>   arch/s390/include/asm/uv.h           |   1 -
>   arch/s390/kernel/uv.c                |  12 +-
>   arch/s390/kvm/Makefile               |   2 +-
>   arch/s390/kvm/diag.c                 |  11 +-
>   arch/s390/kvm/gaccess.c              |   3 +-
>   arch/s390/kvm/gmap-vsie.c            |   1 -
>   arch/s390/kvm/gmap.c                 | 121 -----------
>   arch/s390/kvm/gmap.h                 |  39 ----
>   arch/s390/kvm/intercept.c            |  10 +-
>   arch/s390/kvm/kvm-s390.c             |   8 +-
>   arch/s390/kvm/kvm-s390.h             |  57 ++++++
>   arch/s390/kvm/priv.c                 | 292 ++++++++++++---------------
>   arch/s390/kvm/pv.c                   |  61 +++++-
>   arch/s390/kvm/vsie.c                 |  19 +-
>   arch/s390/mm/Makefile                |   2 +
>   arch/s390/mm/fault.c                 |   1 -
>   arch/s390/mm/gmap.c                  |  47 +----
>   arch/s390/mm/gmap_helpers.c          | 266 ++++++++++++++++++++++++
>   arch/s390/mm/init.c                  |   1 -
>   arch/s390/mm/pgalloc.c               |   2 -
>   arch/s390/mm/pgtable.c               |   1 -
>   24 files changed, 590 insertions(+), 388 deletions(-)

Hehe, that's not what I expected from a cleanup. I assume it's mostly 
the "factor out stuff" and new file that adds these LOC?

-- 
Cheers,

David / dhildenb


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 1/5] s390: remove unneeded includes
  2025-05-14 16:38 ` [PATCH v1 1/5] s390: remove unneeded includes Claudio Imbrenda
@ 2025-05-15 13:56   ` Christoph Schlameuss
  2025-05-15 15:29     ` Claudio Imbrenda
  0 siblings, 1 reply; 21+ messages in thread
From: Christoph Schlameuss @ 2025-05-15 13:56 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nsg, nrb, david,
	hca, agordeev, svens, gor

You added one unnecessary import to arch/s390/kvm/intercept.c

With that removed:
Reviewed-by: Christoph Schlameuss <schlameuss@linux.ibm.com>


On Wed May 14, 2025 at 6:38 PM CEST, Claudio Imbrenda wrote:
> Many files don't need to include asm/tlb.h or asm/gmap.h.
> On the other hand, asm/tlb.h does need to include asm/gmap.h.
>
> Remove all unneeded includes so that asm/tlb.h is not directly used by
> s390 arch code anymore. Remove asm/gmap.h from a few other files as
> well, so that now only KVM code, mm/gmap.c, and asm/tlb.h include it.
>
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  arch/s390/include/asm/tlb.h | 1 +
>  arch/s390/include/asm/uv.h  | 1 -
>  arch/s390/kvm/intercept.c   | 1 +
>  arch/s390/mm/fault.c        | 1 -
>  arch/s390/mm/gmap.c         | 1 -
>  arch/s390/mm/init.c         | 1 -
>  arch/s390/mm/pgalloc.c      | 2 --
>  arch/s390/mm/pgtable.c      | 1 -
>  8 files changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
> index f20601995bb0..56d5f9e0eb2e 100644
> --- a/arch/s390/include/asm/tlb.h
> +++ b/arch/s390/include/asm/tlb.h
> @@ -36,6 +36,7 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
>  
>  #include <asm/tlbflush.h>
>  #include <asm-generic/tlb.h>
> +#include <asm/gmap.h>
>  
>  /*
>   * Release the page cache reference for a pte removed by
> diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
> index 46fb0ef6f984..eeb2db4783e6 100644
> --- a/arch/s390/include/asm/uv.h
> +++ b/arch/s390/include/asm/uv.h
> @@ -16,7 +16,6 @@
>  #include <linux/bug.h>
>  #include <linux/sched.h>
>  #include <asm/page.h>
> -#include <asm/gmap.h>
>  #include <asm/asm.h>
>  
>  #define UVC_CC_OK	0
> diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
> index a06a000f196c..b4834bd4d216 100644
> --- a/arch/s390/kvm/intercept.c
> +++ b/arch/s390/kvm/intercept.c
> @@ -16,6 +16,7 @@
>  #include <asm/irq.h>
>  #include <asm/sysinfo.h>
>  #include <asm/uv.h>
> +#include <asm/gmap.h>

This import is not needed.

>  
>  #include "kvm-s390.h"
>  #include "gaccess.h"
> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
> index da84ff6770de..3829521450dd 100644
> --- a/arch/s390/mm/fault.c
> +++ b/arch/s390/mm/fault.c
> @@ -40,7 +40,6 @@
>  #include <asm/ptrace.h>
>  #include <asm/fault.h>
>  #include <asm/diag.h>
> -#include <asm/gmap.h>
>  #include <asm/irq.h>
>  #include <asm/facility.h>
>  #include <asm/uv.h>
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index a94bd4870c65..4869555ff403 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -24,7 +24,6 @@
>  #include <asm/machine.h>
>  #include <asm/gmap.h>
>  #include <asm/page.h>
> -#include <asm/tlb.h>
>  
>  /*
>   * The address is saved in a radix tree directly; NULL would be ambiguous,
> diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
> index afa085e8186c..074bf4fb4ce2 100644
> --- a/arch/s390/mm/init.c
> +++ b/arch/s390/mm/init.c
> @@ -40,7 +40,6 @@
>  #include <asm/kfence.h>
>  #include <asm/dma.h>
>  #include <asm/abs_lowcore.h>
> -#include <asm/tlb.h>
>  #include <asm/tlbflush.h>
>  #include <asm/sections.h>
>  #include <asm/sclp.h>
> diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
> index e3a6f8ae156c..ddab36875370 100644
> --- a/arch/s390/mm/pgalloc.c
> +++ b/arch/s390/mm/pgalloc.c
> @@ -12,8 +12,6 @@
>  #include <asm/mmu_context.h>
>  #include <asm/page-states.h>
>  #include <asm/pgalloc.h>
> -#include <asm/gmap.h>
> -#include <asm/tlb.h>
>  #include <asm/tlbflush.h>
>  
>  unsigned long *crst_table_alloc(struct mm_struct *mm)
> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> index 9901934284ec..7df70cd8f739 100644
> --- a/arch/s390/mm/pgtable.c
> +++ b/arch/s390/mm/pgtable.c
> @@ -20,7 +20,6 @@
>  #include <linux/ksm.h>
>  #include <linux/mman.h>
>  
> -#include <asm/tlb.h>
>  #include <asm/tlbflush.h>
>  #include <asm/mmu_context.h>
>  #include <asm/page-states.h>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 1/5] s390: remove unneeded includes
  2025-05-15 13:56   ` Christoph Schlameuss
@ 2025-05-15 15:29     ` Claudio Imbrenda
  0 siblings, 0 replies; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-15 15:29 UTC (permalink / raw)
  To: Christoph Schlameuss
  Cc: linux-kernel, kvm, linux-s390, frankja, borntraeger, seiden, nsg,
	nrb, david, hca, agordeev, svens, gor

On Thu, 15 May 2025 15:56:44 +0200
"Christoph Schlameuss" <schlameuss@linux.ibm.com> wrote:

> You added one unnecessary import to arch/s390/kvm/intercept.c
> 
> With that removed:
> Reviewed-by: Christoph Schlameuss <schlameuss@linux.ibm.com>
> 
> 
> On Wed May 14, 2025 at 6:38 PM CEST, Claudio Imbrenda wrote:
> > Many files don't need to include asm/tlb.h or asm/gmap.h.
> > On the other hand, asm/tlb.h does need to include asm/gmap.h.
> >
> > Remove all unneeded includes so that asm/tlb.h is not directly used by
> > s390 arch code anymore. Remove asm/gmap.h from a few other files as
> > well, so that now only KVM code, mm/gmap.c, and asm/tlb.h include it.
> >
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > ---
> >  arch/s390/include/asm/tlb.h | 1 +
> >  arch/s390/include/asm/uv.h  | 1 -
> >  arch/s390/kvm/intercept.c   | 1 +
> >  arch/s390/mm/fault.c        | 1 -
> >  arch/s390/mm/gmap.c         | 1 -
> >  arch/s390/mm/init.c         | 1 -
> >  arch/s390/mm/pgalloc.c      | 2 --
> >  arch/s390/mm/pgtable.c      | 1 -
> >  8 files changed, 2 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
> > index f20601995bb0..56d5f9e0eb2e 100644
> > --- a/arch/s390/include/asm/tlb.h
> > +++ b/arch/s390/include/asm/tlb.h
> > @@ -36,6 +36,7 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
> >  
> >  #include <asm/tlbflush.h>
> >  #include <asm-generic/tlb.h>
> > +#include <asm/gmap.h>
> >  
> >  /*
> >   * Release the page cache reference for a pte removed by
> > diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
> > index 46fb0ef6f984..eeb2db4783e6 100644
> > --- a/arch/s390/include/asm/uv.h
> > +++ b/arch/s390/include/asm/uv.h
> > @@ -16,7 +16,6 @@
> >  #include <linux/bug.h>
> >  #include <linux/sched.h>
> >  #include <asm/page.h>
> > -#include <asm/gmap.h>
> >  #include <asm/asm.h>
> >  
> >  #define UVC_CC_OK	0
> > diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
> > index a06a000f196c..b4834bd4d216 100644
> > --- a/arch/s390/kvm/intercept.c
> > +++ b/arch/s390/kvm/intercept.c
> > @@ -16,6 +16,7 @@
> >  #include <asm/irq.h>
> >  #include <asm/sysinfo.h>
> >  #include <asm/uv.h>
> > +#include <asm/gmap.h>  
> 
> This import is not needed.

it is needed here, but it can be removed in patch 5
(I'll fix that for v2)

> 
> >  
> >  #include "kvm-s390.h"
> >  #include "gaccess.h"
> > diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
> > index da84ff6770de..3829521450dd 100644
> > --- a/arch/s390/mm/fault.c
> > +++ b/arch/s390/mm/fault.c
> > @@ -40,7 +40,6 @@
> >  #include <asm/ptrace.h>
> >  #include <asm/fault.h>
> >  #include <asm/diag.h>
> > -#include <asm/gmap.h>
> >  #include <asm/irq.h>
> >  #include <asm/facility.h>
> >  #include <asm/uv.h>
> > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> > index a94bd4870c65..4869555ff403 100644
> > --- a/arch/s390/mm/gmap.c
> > +++ b/arch/s390/mm/gmap.c
> > @@ -24,7 +24,6 @@
> >  #include <asm/machine.h>
> >  #include <asm/gmap.h>
> >  #include <asm/page.h>
> > -#include <asm/tlb.h>
> >  
> >  /*
> >   * The address is saved in a radix tree directly; NULL would be ambiguous,
> > diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
> > index afa085e8186c..074bf4fb4ce2 100644
> > --- a/arch/s390/mm/init.c
> > +++ b/arch/s390/mm/init.c
> > @@ -40,7 +40,6 @@
> >  #include <asm/kfence.h>
> >  #include <asm/dma.h>
> >  #include <asm/abs_lowcore.h>
> > -#include <asm/tlb.h>
> >  #include <asm/tlbflush.h>
> >  #include <asm/sections.h>
> >  #include <asm/sclp.h>
> > diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
> > index e3a6f8ae156c..ddab36875370 100644
> > --- a/arch/s390/mm/pgalloc.c
> > +++ b/arch/s390/mm/pgalloc.c
> > @@ -12,8 +12,6 @@
> >  #include <asm/mmu_context.h>
> >  #include <asm/page-states.h>
> >  #include <asm/pgalloc.h>
> > -#include <asm/gmap.h>
> > -#include <asm/tlb.h>
> >  #include <asm/tlbflush.h>
> >  
> >  unsigned long *crst_table_alloc(struct mm_struct *mm)
> > diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> > index 9901934284ec..7df70cd8f739 100644
> > --- a/arch/s390/mm/pgtable.c
> > +++ b/arch/s390/mm/pgtable.c
> > @@ -20,7 +20,6 @@
> >  #include <linux/ksm.h>
> >  #include <linux/mman.h>
> >  
> > -#include <asm/tlb.h>
> >  #include <asm/tlbflush.h>
> >  #include <asm/mmu_context.h>
> >  #include <asm/page-states.h>  
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock
  2025-05-14 16:38 ` [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock Claudio Imbrenda
@ 2025-05-19  8:25   ` Christian Borntraeger
  2025-05-19 14:42   ` Nina Schoetterl-Glausch
  1 sibling, 0 replies; 21+ messages in thread
From: Christian Borntraeger @ 2025-05-19  8:25 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, seiden, nsg, nrb, david, hca, agordeev,
	svens, gor

Am 14.05.25 um 18:38 schrieb Claudio Imbrenda:
> All paths leading to handle_essa() already hold the kvm->srcu.
You could add
since commit 800c1065c320 ("KVM: s390: Lock kvm->srcu at the appropriate places").

> Remove unneeded srcu locking from handle_essa().
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>



> ---
>   arch/s390/kvm/priv.c | 4 ----
>   1 file changed, 4 deletions(-)
> 
> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 1a49b89706f8..758cefb5bac7 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -1297,12 +1297,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
>   		/* Retry the ESSA instruction */
>   		kvm_s390_retry_instr(vcpu);
>   	} else {
> -		int srcu_idx;
> -
>   		mmap_read_lock(vcpu->kvm->mm);
> -		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
>   		i = __do_essa(vcpu, orc);
> -		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
>   		mmap_read_unlock(vcpu->kvm->mm);
>   		if (i < 0)
>   			return i;


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock
  2025-05-14 16:38 ` [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock Claudio Imbrenda
  2025-05-19  8:25   ` Christian Borntraeger
@ 2025-05-19 14:42   ` Nina Schoetterl-Glausch
  2025-05-20 14:34     ` Nina Schoetterl-Glausch
  1 sibling, 1 reply; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-19 14:42 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nrb, david, hca,
	agordeev, svens, gor

On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> All paths leading to handle_essa() already hold the kvm->srcu.
> Remove unneeded srcu locking from handle_essa().
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

Reviewed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>

Why are you removing it tho?
It should be very low cost and it makes the code more robust,
since handle_essa itself ensures that it has the lock.
It is also easier to understand which synchronization the function does.
You could of course add a comment stating that the kvm srcu read side needs
to be held. I think this would be good to have if you really don't want the
srcu_read_lock here.
But then you might also want that documented up the call chain.

> ---
>  arch/s390/kvm/priv.c | 4 ----
>  1 file changed, 4 deletions(-)
> 
> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 1a49b89706f8..758cefb5bac7 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -1297,12 +1297,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
>  		/* Retry the ESSA instruction */
>  		kvm_s390_retry_instr(vcpu);
>  	} else {
> -		int srcu_idx;
> -
>  		mmap_read_lock(vcpu->kvm->mm);
> -		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
>  		i = __do_essa(vcpu, orc);
> -		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
>  		mmap_read_unlock(vcpu->kvm->mm);
>  		if (i < 0)
>  			return i;

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c
  2025-05-14 16:38 ` [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c Claudio Imbrenda
@ 2025-05-20 12:49   ` Nina Schoetterl-Glausch
  2025-05-20 14:39     ` Claudio Imbrenda
  0 siblings, 1 reply; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-20 12:49 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nrb, david, hca,
	agordeev, svens, gor

On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> Refactor some functions in priv.c to make them more readable.
> 
> handle_{iske,rrbe,sske}: move duplicated checks into a single function.
> handle{pfmf,epsw}: improve readability.
> handle_lpswe{,y}: merge implementations since they are almost the same.
> 
> Use u64_replace_bits() where it makes sense.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  arch/s390/kvm/kvm-s390.h |  15 ++
>  arch/s390/kvm/priv.c     | 288 ++++++++++++++++++---------------------
>  2 files changed, 148 insertions(+), 155 deletions(-)
> 
[...]

> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 758cefb5bac7..1a26aa591c2e 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -14,6 +14,7 @@
>  #include <linux/mm_types.h>
>  #include <linux/pgtable.h>
>  #include <linux/io.h>
> +#include <linux/bitfield.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/facility.h>
>  #include <asm/current.h>
> @@ -253,29 +254,50 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +struct skeys_ops_state {
> +	int reg1;
> +	int reg2;
> +	int rc;
> +	unsigned long gaddr;
> +};
> +
> +static bool skeys_common_checks(struct kvm_vcpu *vcpu, struct skeys_ops_state *state, bool abs)
> +{
> +	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) {
> +		state->rc = kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> +		return true;
> +	}
> +
> +	state->rc = try_handle_skey(vcpu);
> +	if (state->rc)
> +		return true;
> +
> +	kvm_s390_get_regs_rre(vcpu, &state->reg1, &state->reg2);
> +
> +	state->gaddr = vcpu->run->s.regs.gprs[state->reg2] & PAGE_MASK;
> +	state->gaddr = kvm_s390_logical_to_effective(vcpu, state->gaddr);
> +	if (!abs)
> +		state->gaddr = kvm_s390_real_to_abs(vcpu, state->gaddr);
> +
> +	return false;
> +}

I don't really like this function, IMO it makes the calling functions harder to read.
If it was just a chain of checks it be fine, but with the differing control flow
base on the abs parameter and the complex return value it becomes too complicated.

> +
>  static int handle_iske(struct kvm_vcpu *vcpu)
>  {
> -	unsigned long gaddr, vmaddr;
> +	struct skeys_ops_state state;
> +	unsigned long vmaddr;
>  	unsigned char key;
> -	int reg1, reg2;
>  	bool unlocked;
> +	u64 *r1;
>  	int rc;
>  
>  	vcpu->stat.instruction_iske++;
>  
> -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);

How about a macro INJECT_PGM_ON: INJECT_PGM_ON(kvm_s390_problem_state(vcpu), PGM_PRIVILEGED_OP)


> -
> -	rc = try_handle_skey(vcpu);
> -	if (rc)
> -		return rc != -EAGAIN ? rc : 0;

You are not replicating this behavior, are you?
> -
> -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);

You could introduce a helper

void _kvm_s390_get_gpr_ptrs_rre(vcpu, u64 **reg1, u64 **reg2)
{
	int r1, r2;

	kvm_s390_get_regs_rre(vcpu, &r1, &r2);
	*reg1 = &vcpu->run->s.regs.gprs[r1];
	*reg2 = &vcpu->run->s.regs.gprs[r2];
}

which would remove some clutter from the original function implementations.

> +	if (skeys_common_checks(vcpu, &state, false))
> +		return state.rc;
> +	r1 = vcpu->run->s.regs.gprs + state.reg1;
>  
> -	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> -	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
> -	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
> -	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
> +	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
>  	if (kvm_is_error_hva(vmaddr))
>  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
>  retry:
> @@ -296,33 +318,23 @@ static int handle_iske(struct kvm_vcpu *vcpu)
>  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
>  	if (rc < 0)
>  		return rc;
> -	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
> -	vcpu->run->s.regs.gprs[reg1] |= key;
> +	*r1 = u64_replace_bits(*r1, key, 0xff);
>  	return 0;
>  }
>  
> 
[...]

>  retry:
> @@ -353,40 +365,30 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
>  static int handle_sske(struct kvm_vcpu *vcpu)
>  {
>  	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
> +	struct skeys_ops_state state;
>  	unsigned long start, end;
>  	unsigned char key, oldkey;
> -	int reg1, reg2;
> +	bool nq, mr, mc, mb;
>  	bool unlocked;
> +	u64 *r1, *r2;
>  	int rc;
>  
>  	vcpu->stat.instruction_sske++;
>  
> -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> -
> -	rc = try_handle_skey(vcpu);
> -	if (rc)
> -		return rc != -EAGAIN ? rc : 0;
> -
> -	if (!test_kvm_facility(vcpu->kvm, 8))
> -		m3 &= ~SSKE_MB;
> -	if (!test_kvm_facility(vcpu->kvm, 10))
> -		m3 &= ~(SSKE_MC | SSKE_MR);
> -	if (!test_kvm_facility(vcpu->kvm, 14))
> -		m3 &= ~SSKE_NQ;
> +	mb = test_kvm_facility(vcpu->kvm, 8) && (m3 & SSKE_MB);
> +	mr = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MR);
> +	mc = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MC);
> +	nq = test_kvm_facility(vcpu->kvm, 14) && (m3 & SSKE_NQ);

That is indeed much nicer.

>  
> -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> +	/* start already designates an absolute address if MB is set */
> +	if (skeys_common_checks(vcpu, &state, mb))
> +		return state.rc;
>  
> -	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
> -	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> -	start = kvm_s390_logical_to_effective(vcpu, start);
> -	if (m3 & SSKE_MB) {
> -		/* start already designates an absolute address */
> -		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
> -	} else {
> -		start = kvm_s390_real_to_abs(vcpu, start);
> -		end = start + PAGE_SIZE;
> -	}
> +	start = state.gaddr;
> +	end = mb ? ALIGN(start + 1, _SEGMENT_SIZE) : start + PAGE_SIZE;

Alternatively you could do ALIGN_DOWN(start, _SEGMENT_SIZE) + _SEGMENT_SIZE,
which seems a bit easier to read, but it's really minor.

> +	r1 = vcpu->run->s.regs.gprs + state.reg1;
> +	r2 = vcpu->run->s.regs.gprs + state.reg2;
> +	key = *r1 & 0xfe;
>  
>  	while (start != end) {
>  		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
> @@ -396,9 +398,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
>  			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
>  
>  		mmap_read_lock(current->mm);
> -		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
> -						m3 & SSKE_NQ, m3 & SSKE_MR,
> -						m3 & SSKE_MC);
> +		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, nq, mr, mc);
>  
>  		if (rc < 0) {
>  			rc = fixup_user_fault(current->mm, vmaddr,
> @@ -415,23 +415,21 @@ static int handle_sske(struct kvm_vcpu *vcpu)
>  		start += PAGE_SIZE;
>  	}
>  
> -	if (m3 & (SSKE_MC | SSKE_MR)) {
> -		if (m3 & SSKE_MB) {
> +	if (mc || mr) {
> +		if (mb) {
>  			/* skey in reg1 is unpredictable */
>  			kvm_s390_set_psw_cc(vcpu, 3);
>  		} else {
>  			kvm_s390_set_psw_cc(vcpu, rc);
> -			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
> -			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
> +			*r1 = u64_replace_bits(*r1, oldkey << 8, 0xff00);

Uh, u64_replace_bits does the shift for you, no?
So it should be u64_replace_bits(*r1, oldkey, 0xff00)

You could also do u64p_replace_bits(r1, oldkey, 0xff00) but I'd actually prefer the assignment
as you do it.

>  		}
>  	}
> -	if (m3 & SSKE_MB) {
> -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
> -			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
> -		else
> -			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
> +	if (mb) {
>  		end = kvm_s390_logical_to_effective(vcpu, end);
> -		vcpu->run->s.regs.gprs[reg2] |= end;
> +		if (kvm_s390_is_amode_64(vcpu))
> +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> +		else
> +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);

This does not work because of the implicit shift.
So you need to use gpa_to_gfn(end) instead.
(I think I would prefer using start instead of end, since it better shows
the interruptible nature of the instruction, but start == end if
we get here so ...)

>  	}
>  	return 0;
>  }
> @@ -773,46 +771,28 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> -static int handle_lpswe(struct kvm_vcpu *vcpu)
> +static int handle_lpswe_y(struct kvm_vcpu *vcpu, bool lpswey)
>  {
>  	psw_t new_psw;
>  	u64 addr;
>  	int rc;
>  	u8 ar;
>  
> -	vcpu->stat.instruction_lpswe++;
> -
> -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> -
> -	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> -	if (addr & 7)
> -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> -	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
> -	if (rc)
> -		return kvm_s390_inject_prog_cond(vcpu, rc);
> -	vcpu->arch.sie_block->gpsw = new_psw;
> -	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
> -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> -	return 0;
> -}
> -
> -static int handle_lpswey(struct kvm_vcpu *vcpu)
> -{
> -	psw_t new_psw;
> -	u64 addr;
> -	int rc;
> -	u8 ar;
> -
> -	vcpu->stat.instruction_lpswey++;
> +	if (lpswey)
> +		vcpu->stat.instruction_lpswey++;
> +	else
> +		vcpu->stat.instruction_lpswe++;
>  
> -	if (!test_kvm_facility(vcpu->kvm, 193))
> +	if (lpswey && !test_kvm_facility(vcpu->kvm, 193))
>  		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
>  
>  	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
>  		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
>  
> -	addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> +	if (!lpswey)
> +		addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> +	else
> +		addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
>  	if (addr & 7)
>  		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

I'd prefer a helper function _do_lpswe_y_swap(struct kvm_vcpu *vcpu, gpa_t addr)

and then just

static int handle_lpswey(struct kvm_vcpu *vcpu)
{
        u64 addr;
        u8 ar;

        vcpu->stat.instruction_lpswey++;

        if (!test_kvm_facility(vcpu->kvm, 193))
                return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);

        addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
	return _do_lpswe_y_swap(vcpu, addr);
}

Makes it easier to read IMO because of the simpler control flow.
>  
> @@ -1034,7 +1014,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
>  	case 0xb1:
>  		return handle_stfl(vcpu);
>  	case 0xb2:
> -		return handle_lpswe(vcpu);
> +		return handle_lpswe_y(vcpu, false);
>  	default:
>  		return -EOPNOTSUPP;
>  	}
> @@ -1043,42 +1023,50 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
>  static int handle_epsw(struct kvm_vcpu *vcpu)
>  {
>  	int reg1, reg2;
> +	u64 *r1, *r2;
>  
>  	vcpu->stat.instruction_epsw++;
>  
>  	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> +	r1 = vcpu->run->s.regs.gprs + reg1;
> +	r2 = vcpu->run->s.regs.gprs + reg2;
>  
>  	/* This basically extracts the mask half of the psw. */
> -	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
> -	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
> -	if (reg2) {
> -		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
> -		vcpu->run->s.regs.gprs[reg2] |=
> -			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
> -	}
> +	*r1 = u64_replace_bits(*r1, vcpu->arch.sie_block->gpsw.mask >> 32, 0xffffffff);
> +	if (reg2)
> +		*r2 = u64_replace_bits(*r2, vcpu->arch.sie_block->gpsw.mask, 0xffffffff);

LGTM although I don't hate the original implementation, which is very easy to understand
compared to u64_replace_bits whose implementation is anything but.
It would be nice to make gprs a union, which I think should be fine from a backwards
compatibility point of view. So:

struct kvm_sync_regs {
	__u64 prefix;	/* prefix register */
	union {
		__u64 gprs[16];	/* general purpose registers */
		struct { __u32 h; __u32 l} gprs32[16];
		struct { __u16 hh; __u16 hl; ...} gprs16[16];
		... 
...

But I don't expect you to do the refactor.
You could of course also contribute documentation to bitfield.h :)

>  	return 0;
>  }

[...]

>  static int handle_pfmf(struct kvm_vcpu *vcpu)
>  {

[...]

> -	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
> -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
> -			vcpu->run->s.regs.gprs[reg2] = end;
> -		} else {
> -			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
> -			end = kvm_s390_logical_to_effective(vcpu, end);
> -			vcpu->run->s.regs.gprs[reg2] |= end;
> -		}
> +	if (r1.fsc) {
> +		u64 *r2 = vcpu->run->s.regs.gprs + reg2;
> +
> +		end = kvm_s390_logical_to_effective(vcpu, end);
> +		if (kvm_s390_is_amode_64(vcpu))
> +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> +		else
> +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);

Same issue as above regarding the shift.

>  	}
>  	return 0;
>  }
> @@ -1361,8 +1338,9 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
>  	reg = reg1;
>  	nr_regs = 0;
>  	do {
> -		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
> -		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
> +		u64 *cr = vcpu->arch.sie_block->gcr + reg;
> +
> +		*cr = u64_replace_bits(*cr, ctl_array[nr_regs++], 0xffffffff);
>  		if (reg == reg3)
>  			break;
>  		reg = (reg + 1) % 16;
> @@ -1489,7 +1467,7 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
>  	case 0x62:
>  		return handle_ri(vcpu);
>  	case 0x71:
> -		return handle_lpswey(vcpu);
> +		return handle_lpswe_y(vcpu, true);
>  	default:
>  		return -EOPNOTSUPP;
>  	}

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock
  2025-05-19 14:42   ` Nina Schoetterl-Glausch
@ 2025-05-20 14:34     ` Nina Schoetterl-Glausch
  2025-06-27  8:45       ` Christian Borntraeger
  0 siblings, 1 reply; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-20 14:34 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nrb, david, hca,
	agordeev, svens, gor

On Mon, 2025-05-19 at 16:42 +0200, Nina Schoetterl-Glausch wrote:
> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > All paths leading to handle_essa() already hold the kvm->srcu.
> > Remove unneeded srcu locking from handle_essa().
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> 
> Reviewed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> 
> Why are you removing it tho?
> It should be very low cost and it makes the code more robust,
> since handle_essa itself ensures that it has the lock.
> It is also easier to understand which synchronization the function does.
> You could of course add a comment stating that the kvm srcu read side needs
> to be held. I think this would be good to have if you really don't want the
> srcu_read_lock here.
> But then you might also want that documented up the call chain.

Actually, can we use __must_hold or have some assert?
> 
> > ---
> >  arch/s390/kvm/priv.c | 4 ----
> >  1 file changed, 4 deletions(-)
> > 
> > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> > index 1a49b89706f8..758cefb5bac7 100644
> > --- a/arch/s390/kvm/priv.c
> > +++ b/arch/s390/kvm/priv.c
> > @@ -1297,12 +1297,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
> >  		/* Retry the ESSA instruction */
> >  		kvm_s390_retry_instr(vcpu);
> >  	} else {
> > -		int srcu_idx;
> > -
> >  		mmap_read_lock(vcpu->kvm->mm);
> > -		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> >  		i = __do_essa(vcpu, orc);
> > -		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
> >  		mmap_read_unlock(vcpu->kvm->mm);
> >  		if (i < 0)
> >  			return i;

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c
  2025-05-20 12:49   ` Nina Schoetterl-Glausch
@ 2025-05-20 14:39     ` Claudio Imbrenda
  0 siblings, 0 replies; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-20 14:39 UTC (permalink / raw)
  To: Nina Schoetterl-Glausch
  Cc: linux-kernel, kvm, linux-s390, frankja, borntraeger, seiden, nrb,
	david, hca, agordeev, svens, gor

On Tue, 20 May 2025 14:49:55 +0200
Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:

> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > Refactor some functions in priv.c to make them more readable.
> > 
> > handle_{iske,rrbe,sske}: move duplicated checks into a single function.
> > handle{pfmf,epsw}: improve readability.
> > handle_lpswe{,y}: merge implementations since they are almost the same.
> > 
> > Use u64_replace_bits() where it makes sense.
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > ---
> >  arch/s390/kvm/kvm-s390.h |  15 ++
> >  arch/s390/kvm/priv.c     | 288 ++++++++++++++++++---------------------
> >  2 files changed, 148 insertions(+), 155 deletions(-)
> >   
> [...]
> 
> > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> > index 758cefb5bac7..1a26aa591c2e 100644
> > --- a/arch/s390/kvm/priv.c
> > +++ b/arch/s390/kvm/priv.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/mm_types.h>
> >  #include <linux/pgtable.h>
> >  #include <linux/io.h>
> > +#include <linux/bitfield.h>
> >  #include <asm/asm-offsets.h>
> >  #include <asm/facility.h>
> >  #include <asm/current.h>
> > @@ -253,29 +254,50 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
> >  	return 0;
> >  }
> >  
> > +struct skeys_ops_state {
> > +	int reg1;
> > +	int reg2;
> > +	int rc;
> > +	unsigned long gaddr;
> > +};
> > +
> > +static bool skeys_common_checks(struct kvm_vcpu *vcpu, struct skeys_ops_state *state, bool abs)
> > +{
> > +	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) {
> > +		state->rc = kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > +		return true;
> > +	}
> > +
> > +	state->rc = try_handle_skey(vcpu);
> > +	if (state->rc)
> > +		return true;
> > +
> > +	kvm_s390_get_regs_rre(vcpu, &state->reg1, &state->reg2);
> > +
> > +	state->gaddr = vcpu->run->s.regs.gprs[state->reg2] & PAGE_MASK;
> > +	state->gaddr = kvm_s390_logical_to_effective(vcpu, state->gaddr);
> > +	if (!abs)
> > +		state->gaddr = kvm_s390_real_to_abs(vcpu, state->gaddr);
> > +
> > +	return false;
> > +}  
> 
> I don't really like this function, IMO it makes the calling functions harder to read.
> If it was just a chain of checks it be fine, but with the differing control flow
> base on the abs parameter and the complex return value it becomes too complicated.

I'll try to improve it

> 
> > +
> >  static int handle_iske(struct kvm_vcpu *vcpu)
> >  {
> > -	unsigned long gaddr, vmaddr;
> > +	struct skeys_ops_state state;
> > +	unsigned long vmaddr;
> >  	unsigned char key;
> > -	int reg1, reg2;
> >  	bool unlocked;
> > +	u64 *r1;
> >  	int rc;
> >  
> >  	vcpu->stat.instruction_iske++;
> >  
> > -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);  
> 
> How about a macro INJECT_PGM_ON: INJECT_PGM_ON(kvm_s390_problem_state(vcpu), PGM_PRIVILEGED_OP)

no, I would like to avoid hiding control flow in a macro

> 
> 
> > -
> > -	rc = try_handle_skey(vcpu);
> > -	if (rc)
> > -		return rc != -EAGAIN ? rc : 0;  
> 
> You are not replicating this behavior, are you?

no, but it's fine, we can afford a useless trip to userspace literally
once in the lifetime of the guest

> > -
> > -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);  
> 
> You could introduce a helper
> 
> void _kvm_s390_get_gpr_ptrs_rre(vcpu, u64 **reg1, u64 **reg2)
> {
> 	int r1, r2;
> 
> 	kvm_s390_get_regs_rre(vcpu, &r1, &r2);
> 	*reg1 = &vcpu->run->s.regs.gprs[r1];
> 	*reg2 = &vcpu->run->s.regs.gprs[r2];
> }
> 
> which would remove some clutter from the original function implementations.
> 
> > +	if (skeys_common_checks(vcpu, &state, false))
> > +		return state.rc;
> > +	r1 = vcpu->run->s.regs.gprs + state.reg1;
> >  
> > -	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> > -	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
> > -	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
> > -	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
> > +	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
> >  	if (kvm_is_error_hva(vmaddr))
> >  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >  retry:
> > @@ -296,33 +318,23 @@ static int handle_iske(struct kvm_vcpu *vcpu)
> >  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >  	if (rc < 0)
> >  		return rc;
> > -	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
> > -	vcpu->run->s.regs.gprs[reg1] |= key;
> > +	*r1 = u64_replace_bits(*r1, key, 0xff);
> >  	return 0;
> >  }
> >  
> >   
> [...]
> 
> >  retry:
> > @@ -353,40 +365,30 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
> >  static int handle_sske(struct kvm_vcpu *vcpu)
> >  {
> >  	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
> > +	struct skeys_ops_state state;
> >  	unsigned long start, end;
> >  	unsigned char key, oldkey;
> > -	int reg1, reg2;
> > +	bool nq, mr, mc, mb;
> >  	bool unlocked;
> > +	u64 *r1, *r2;
> >  	int rc;
> >  
> >  	vcpu->stat.instruction_sske++;
> >  
> > -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > -
> > -	rc = try_handle_skey(vcpu);
> > -	if (rc)
> > -		return rc != -EAGAIN ? rc : 0;
> > -
> > -	if (!test_kvm_facility(vcpu->kvm, 8))
> > -		m3 &= ~SSKE_MB;
> > -	if (!test_kvm_facility(vcpu->kvm, 10))
> > -		m3 &= ~(SSKE_MC | SSKE_MR);
> > -	if (!test_kvm_facility(vcpu->kvm, 14))
> > -		m3 &= ~SSKE_NQ;
> > +	mb = test_kvm_facility(vcpu->kvm, 8) && (m3 & SSKE_MB);
> > +	mr = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MR);
> > +	mc = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MC);
> > +	nq = test_kvm_facility(vcpu->kvm, 14) && (m3 & SSKE_NQ);  
> 
> That is indeed much nicer.
> 
> >  
> > -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> > +	/* start already designates an absolute address if MB is set */
> > +	if (skeys_common_checks(vcpu, &state, mb))
> > +		return state.rc;
> >  
> > -	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
> > -	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> > -	start = kvm_s390_logical_to_effective(vcpu, start);
> > -	if (m3 & SSKE_MB) {
> > -		/* start already designates an absolute address */
> > -		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
> > -	} else {
> > -		start = kvm_s390_real_to_abs(vcpu, start);
> > -		end = start + PAGE_SIZE;
> > -	}
> > +	start = state.gaddr;
> > +	end = mb ? ALIGN(start + 1, _SEGMENT_SIZE) : start + PAGE_SIZE;  
> 
> Alternatively you could do ALIGN_DOWN(start, _SEGMENT_SIZE) + _SEGMENT_SIZE,
> which seems a bit easier to read, but it's really minor.
> 
> > +	r1 = vcpu->run->s.regs.gprs + state.reg1;
> > +	r2 = vcpu->run->s.regs.gprs + state.reg2;
> > +	key = *r1 & 0xfe;
> >  
> >  	while (start != end) {
> >  		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
> > @@ -396,9 +398,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
> >  			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >  
> >  		mmap_read_lock(current->mm);
> > -		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
> > -						m3 & SSKE_NQ, m3 & SSKE_MR,
> > -						m3 & SSKE_MC);
> > +		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, nq, mr, mc);
> >  
> >  		if (rc < 0) {
> >  			rc = fixup_user_fault(current->mm, vmaddr,
> > @@ -415,23 +415,21 @@ static int handle_sske(struct kvm_vcpu *vcpu)
> >  		start += PAGE_SIZE;
> >  	}
> >  
> > -	if (m3 & (SSKE_MC | SSKE_MR)) {
> > -		if (m3 & SSKE_MB) {
> > +	if (mc || mr) {
> > +		if (mb) {
> >  			/* skey in reg1 is unpredictable */
> >  			kvm_s390_set_psw_cc(vcpu, 3);
> >  		} else {
> >  			kvm_s390_set_psw_cc(vcpu, rc);
> > -			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
> > -			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
> > +			*r1 = u64_replace_bits(*r1, oldkey << 8, 0xff00);  
> 
> Uh, u64_replace_bits does the shift for you, no?
> So it should be u64_replace_bits(*r1, oldkey, 0xff00)
> 
> You could also do u64p_replace_bits(r1, oldkey, 0xff00) but I'd actually prefer the assignment
> as you do it.

yeahhhhhh I think I'll completely rewrite those parts using bitfields
and structs / unions

> 
> >  		}
> >  	}
> > -	if (m3 & SSKE_MB) {
> > -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
> > -			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
> > -		else
> > -			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
> > +	if (mb) {
> >  		end = kvm_s390_logical_to_effective(vcpu, end);
> > -		vcpu->run->s.regs.gprs[reg2] |= end;
> > +		if (kvm_s390_is_amode_64(vcpu))
> > +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> > +		else
> > +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);  
> 
> This does not work because of the implicit shift.
> So you need to use gpa_to_gfn(end) instead.
> (I think I would prefer using start instead of end, since it better shows
> the interruptible nature of the instruction, but start == end if
> we get here so ...)
> 
> >  	}
> >  	return 0;
> >  }
> > @@ -773,46 +771,28 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
> >  	return 0;
> >  }
> >  
> > -static int handle_lpswe(struct kvm_vcpu *vcpu)
> > +static int handle_lpswe_y(struct kvm_vcpu *vcpu, bool lpswey)
> >  {
> >  	psw_t new_psw;
> >  	u64 addr;
> >  	int rc;
> >  	u8 ar;
> >  
> > -	vcpu->stat.instruction_lpswe++;
> > -
> > -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > -
> > -	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> > -	if (addr & 7)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> > -	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
> > -	if (rc)
> > -		return kvm_s390_inject_prog_cond(vcpu, rc);
> > -	vcpu->arch.sie_block->gpsw = new_psw;
> > -	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
> > -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> > -	return 0;
> > -}
> > -
> > -static int handle_lpswey(struct kvm_vcpu *vcpu)
> > -{
> > -	psw_t new_psw;
> > -	u64 addr;
> > -	int rc;
> > -	u8 ar;
> > -
> > -	vcpu->stat.instruction_lpswey++;
> > +	if (lpswey)
> > +		vcpu->stat.instruction_lpswey++;
> > +	else
> > +		vcpu->stat.instruction_lpswe++;
> >  
> > -	if (!test_kvm_facility(vcpu->kvm, 193))
> > +	if (lpswey && !test_kvm_facility(vcpu->kvm, 193))
> >  		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
> >  
> >  	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> >  		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> >  
> > -	addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> > +	if (!lpswey)
> > +		addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> > +	else
> > +		addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> >  	if (addr & 7)
> >  		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);  
> 
> I'd prefer a helper function _do_lpswe_y_swap(struct kvm_vcpu *vcpu, gpa_t addr)
> 
> and then just
> 
> static int handle_lpswey(struct kvm_vcpu *vcpu)
> {
>         u64 addr;
>         u8 ar;
> 
>         vcpu->stat.instruction_lpswey++;
> 
>         if (!test_kvm_facility(vcpu->kvm, 193))
>                 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
> 
>         addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> 	return _do_lpswe_y_swap(vcpu, addr);
> }
> 
> Makes it easier to read IMO because of the simpler control flow.

hmmm you have a point

> >  
> > @@ -1034,7 +1014,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
> >  	case 0xb1:
> >  		return handle_stfl(vcpu);
> >  	case 0xb2:
> > -		return handle_lpswe(vcpu);
> > +		return handle_lpswe_y(vcpu, false);
> >  	default:
> >  		return -EOPNOTSUPP;
> >  	}
> > @@ -1043,42 +1023,50 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
> >  static int handle_epsw(struct kvm_vcpu *vcpu)
> >  {
> >  	int reg1, reg2;
> > +	u64 *r1, *r2;
> >  
> >  	vcpu->stat.instruction_epsw++;
> >  
> >  	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> > +	r1 = vcpu->run->s.regs.gprs + reg1;
> > +	r2 = vcpu->run->s.regs.gprs + reg2;
> >  
> >  	/* This basically extracts the mask half of the psw. */
> > -	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
> > -	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
> > -	if (reg2) {
> > -		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
> > -		vcpu->run->s.regs.gprs[reg2] |=
> > -			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
> > -	}
> > +	*r1 = u64_replace_bits(*r1, vcpu->arch.sie_block->gpsw.mask >> 32, 0xffffffff);
> > +	if (reg2)
> > +		*r2 = u64_replace_bits(*r2, vcpu->arch.sie_block->gpsw.mask, 0xffffffff);  
> 
> LGTM although I don't hate the original implementation, which is very easy to understand
> compared to u64_replace_bits whose implementation is anything but.

yeah I agree

> It would be nice to make gprs a union, which I think should be fine from a backwards
> compatibility point of view. So:
> 
> struct kvm_sync_regs {
> 	__u64 prefix;	/* prefix register */
> 	union {
> 		__u64 gprs[16];	/* general purpose registers */
> 		struct { __u32 h; __u32 l} gprs32[16];
> 		struct { __u16 hh; __u16 hl; ...} gprs16[16];
> 		... 
> ...
> 
> But I don't expect you to do the refactor.
> You could of course also contribute documentation to bitfield.h :)

ehhhhhhh

> 
> >  	return 0;
> >  }  
> 
> [...]
> 
> >  static int handle_pfmf(struct kvm_vcpu *vcpu)
> >  {  
> 
> [...]
> 
> > -	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
> > -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
> > -			vcpu->run->s.regs.gprs[reg2] = end;
> > -		} else {
> > -			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
> > -			end = kvm_s390_logical_to_effective(vcpu, end);
> > -			vcpu->run->s.regs.gprs[reg2] |= end;
> > -		}
> > +	if (r1.fsc) {
> > +		u64 *r2 = vcpu->run->s.regs.gprs + reg2;
> > +
> > +		end = kvm_s390_logical_to_effective(vcpu, end);
> > +		if (kvm_s390_is_amode_64(vcpu))
> > +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> > +		else
> > +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);  
> 
> Same issue as above regarding the shift.
> 
> >  	}
> >  	return 0;
> >  }
> > @@ -1361,8 +1338,9 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
> >  	reg = reg1;
> >  	nr_regs = 0;
> >  	do {
> > -		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
> > -		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
> > +		u64 *cr = vcpu->arch.sie_block->gcr + reg;
> > +
> > +		*cr = u64_replace_bits(*cr, ctl_array[nr_regs++], 0xffffffff);
> >  		if (reg == reg3)
> >  			break;
> >  		reg = (reg + 1) % 16;
> > @@ -1489,7 +1467,7 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
> >  	case 0x62:
> >  		return handle_ri(vcpu);
> >  	case 0x71:
> > -		return handle_lpswey(vcpu);
> > +		return handle_lpswe_y(vcpu, true);
> >  	default:
> >  		return -EOPNOTSUPP;
> >  	}  
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
  2025-05-14 16:38 ` [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers Claudio Imbrenda
@ 2025-05-21 14:55   ` Nina Schoetterl-Glausch
  2025-05-21 15:19     ` Claudio Imbrenda
  0 siblings, 1 reply; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-21 14:55 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nrb, david, hca,
	agordeev, svens, gor

On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> Refactor some gmap functions; move the implementation into a separate
> file with only helper functions. The new helper functions work on vm
> addresses, leaving all gmap logic in the gmap functions, which mostly
> become just wrappers.
> 
> The whole gmap handling is going to be moved inside KVM soon, but the
> helper functions need to touch core mm functions, and thus need to
> stay in the core of kernel.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  MAINTAINERS                          |   2 +
>  arch/s390/include/asm/gmap_helpers.h |  18 ++
>  arch/s390/kvm/diag.c                 |  11 +-
>  arch/s390/kvm/kvm-s390.c             |   3 +-
>  arch/s390/mm/Makefile                |   2 +
>  arch/s390/mm/gmap.c                  |  46 ++---
>  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
>  7 files changed, 307 insertions(+), 41 deletions(-)
>  create mode 100644 arch/s390/include/asm/gmap_helpers.h
>  create mode 100644 arch/s390/mm/gmap_helpers.c
> 
[...]

> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 4869555ff403..17a2a57de3a2 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> 
[...]

>  void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
>  {
> -	unsigned long gaddr, vmaddr, size;
> -	struct vm_area_struct *vma;
> +	unsigned long vmaddr, next, start, end;
>  
>  	mmap_read_lock(gmap->mm);
> -	for (gaddr = from; gaddr < to;
> -	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
> -		/* Find the vm address for the guest address */
> -		vmaddr = (unsigned long)
> -			radix_tree_lookup(&gmap->guest_to_host,
> -					  gaddr >> PMD_SHIFT);
> +	for ( ; from < to; from = next) {
> +		next = ALIGN(from + 1, PMD_SIZE);

I think you can use pmd_addr_end(from, to) here...

> +		vmaddr = (unsigned long)radix_tree_lookup(&gmap->guest_to_host, from >> PMD_SHIFT);
>  		if (!vmaddr)
>  			continue;
> -		vmaddr |= gaddr & ~PMD_MASK;
> -		/* Find vma in the parent mm */
> -		vma = find_vma(gmap->mm, vmaddr);
> -		if (!vma)
> -			continue;
> -		/*
> -		 * We do not discard pages that are backed by
> -		 * hugetlbfs, so we don't have to refault them.
> -		 */
> -		if (is_vm_hugetlb_page(vma))
> -			continue;
> -		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
> -		zap_page_range_single(vma, vmaddr, size, NULL);
> +		start = vmaddr | (from & ~PMD_MASK);
> +		end = (vmaddr | (min(to - 1, next - 1) & ~PMD_MASK)) + 1;

... then simply do:
		end = vmaddr + (next - from);

> +		__gmap_helper_discard(gmap->mm, start, end);
>  	}
>  	mmap_read_unlock(gmap->mm);
>  }
> diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
> new file mode 100644
> index 000000000000..8e66586718f6
> --- /dev/null
> +++ b/arch/s390/mm/gmap_helpers.c
> @@ -0,0 +1,266 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + *  Helper functions for KVM guest address space mapping code
> + *
> + *    Copyright IBM Corp. 2007, 2025
> + *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
> + *               Martin Schwidefsky <schwidefsky@de.ibm.com>
> + *               David Hildenbrand <david@redhat.com>
> + *               Janosch Frank <frankja@linux.vnet.ibm.com>
> + */
> +#include <linux/mm_types.h>
> +#include <linux/mmap_lock.h>
> +#include <linux/mm.h>
> +#include <linux/hugetlb.h>
> +#include <linux/pagewalk.h>
> +#include <linux/ksm.h>
> +#include <asm/gmap_helpers.h>

Please add documentation to all these functions for those of us that
don't live and breath mm code :)

> +
> +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
> +{
> +	if (!non_swap_entry(entry))
> +		dec_mm_counter(mm, MM_SWAPENTS);
> +	else if (is_migration_entry(entry))
> +		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
> +	free_swap_and_cache(entry);
> +}
> +
> +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)

__gmap_helper_zap_mapping_pte ?

> +{
> +	struct vm_area_struct *vma;
> +	spinlock_t *ptl;
> +	pte_t *ptep;
> +
> +	mmap_assert_locked(mm);
> +
> +	/* Find the vm address for the guest address */
> +	vma = vma_lookup(mm, vmaddr);
> +	if (!vma || is_vm_hugetlb_page(vma))
> +		return;
> +
> +	/* Get pointer to the page table entry */
> +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> +	if (!likely(ptep))

if (unlikely(!ptep)) reads nicer to me.

> +		return;
> +	if (pte_swap(*ptep))
> +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> +	pte_unmap_unlock(ptep, ptl);
> +}
> +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);

Looks reasonable, but I'm not well versed enough in mm code to evaluate
that with confidence.

> +
> +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)

Maybe call this gmap_helper_discard_nolock or something.

> +{
> +	struct vm_area_struct *vma;
> +	unsigned long next;
> +
> +	mmap_assert_locked(mm);
> +
> +	while (vmaddr < end) {
> +		vma = find_vma_intersection(mm, vmaddr, end);
> +		if (!vma)
> +			break;
> +		vmaddr = max(vmaddr, vma->vm_start);
> +		next = min(end, vma->vm_end);
> +		if (!is_vm_hugetlb_page(vma))
> +			zap_page_range_single(vma, vmaddr, next - vmaddr, NULL);
> +		vmaddr = next;
> +	}
> +}
> +
> +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
> +{
> +	mmap_read_lock(mm);
> +	__gmap_helper_discard(mm, vmaddr, end);
> +	mmap_read_unlock(mm);
> +}
> +EXPORT_SYMBOL_GPL(gmap_helper_discard);
> +
> +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
> +{
> +	struct vm_area_struct *vma;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +
> +	/* We need a valid VMA, otherwise this is clearly a fault. */
> +	vma = vma_lookup(mm, addr);
> +	if (!vma)
> +		return -EFAULT;
> +
> +	pgd = pgd_offset(mm, addr);
> +	if (!pgd_present(*pgd))
> +		return -ENOENT;

What about pgd_bad?

> +
> +	p4d = p4d_offset(pgd, addr);
> +	if (!p4d_present(*p4d))
> +		return -ENOENT;
> +
> +	pud = pud_offset(p4d, addr);
> +	if (!pud_present(*pud))
> +		return -ENOENT;
> +
> +	/* Large PUDs are not supported yet. */
> +	if (pud_leaf(*pud))
> +		return -EFAULT;
> +
> +	*pmdp = pmd_offset(pud, addr);
> +	return 0;
> +}
> +
> +void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr)

What is this function for? Why do you introduce it now?

> +{
> +	spinlock_t *ptl;
> +	pmd_t *pmdp;
> +	pte_t *ptep;
> +
> +	mmap_assert_locked(mm);
> +
> +	if (pmd_lookup(mm, vmaddr, &pmdp))
> +		return;
> +	ptl = pmd_lock(mm, pmdp);
> +	if (!pmd_present(*pmdp) || pmd_leaf(*pmdp)) {
> +		spin_unlock(ptl);
> +		return;
> +	}
> +	spin_unlock(ptl);
> +
> +	ptep = pte_offset_map_lock(mm, pmdp, vmaddr, &ptl);
> +	if (!ptep)
> +		return;
> +	/* The last byte of a pte can be changed freely without ipte */
> +	__atomic64_or(_PAGE_UNUSED, (long *)ptep);
> +	pte_unmap_unlock(ptep, ptl);
> +}
> +EXPORT_SYMBOL_GPL(__gmap_helper_set_unused);

The stuff below is from arch/s390/mm/gmap.c right?
Are you going to delete it from there?

> +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
> +				   unsigned long end, struct mm_walk *walk)
> +{

[...]

> +}
> +
> +static const struct mm_walk_ops find_zeropage_ops = {
> +	.pte_entry      = find_zeropage_pte_entry,
> +	.walk_lock      = PGWALK_WRLOCK,
> +};
> +
> +/*
> + * Unshare all shared zeropages, replacing them by anonymous pages. Note that
> + * we cannot simply zap all shared zeropages, because this could later
> + * trigger unexpected userfaultfd missing events.
> + *
> + * This must be called after mm->context.allow_cow_sharing was
> + * set to 0, to avoid future mappings of shared zeropages.
> + *
> + * mm contracts with s390, that even if mm were to remove a page table,
> + * and racing with walk_page_range_vma() calling pte_offset_map_lock()
> + * would fail, it will never insert a page table containing empty zero
> + * pages once mm_forbids_zeropage(mm) i.e.
> + * mm->context.allow_cow_sharing is set to 0.
> + */
> +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
> +{

[...]

> +}
> +
> +static int __gmap_helper_disable_cow_sharing(struct mm_struct *mm)
> +{

[...]

> +}
> +
> +/*
> + * Disable most COW-sharing of memory pages for the whole process:
> + * (1) Disable KSM and unmerge/unshare any KSM pages.
> + * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
> + *
> + * Not that we currently don't bother with COW-shared pages that are shared
> + * with parent/child processes due to fork().
> + */
> +int gmap_helper_disable_cow_sharing(void)
> +{

[...]

> +}
> +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
  2025-05-21 14:55   ` Nina Schoetterl-Glausch
@ 2025-05-21 15:19     ` Claudio Imbrenda
  2025-05-21 15:30       ` Nina Schoetterl-Glausch
  0 siblings, 1 reply; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-21 15:19 UTC (permalink / raw)
  To: Nina Schoetterl-Glausch
  Cc: linux-kernel, kvm, linux-s390, frankja, borntraeger, seiden, nrb,
	david, hca, agordeev, svens, gor

On Wed, 21 May 2025 16:55:18 +0200
Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:

> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > Refactor some gmap functions; move the implementation into a separate
> > file with only helper functions. The new helper functions work on vm
> > addresses, leaving all gmap logic in the gmap functions, which mostly
> > become just wrappers.
> > 
> > The whole gmap handling is going to be moved inside KVM soon, but the
> > helper functions need to touch core mm functions, and thus need to
> > stay in the core of kernel.
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > ---
> >  MAINTAINERS                          |   2 +
> >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> >  arch/s390/kvm/diag.c                 |  11 +-
> >  arch/s390/kvm/kvm-s390.c             |   3 +-
> >  arch/s390/mm/Makefile                |   2 +
> >  arch/s390/mm/gmap.c                  |  46 ++---
> >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> >  7 files changed, 307 insertions(+), 41 deletions(-)
> >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> >  create mode 100644 arch/s390/mm/gmap_helpers.c
> >   
> [...]
> 
> > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> > index 4869555ff403..17a2a57de3a2 100644
> > --- a/arch/s390/mm/gmap.c
> > +++ b/arch/s390/mm/gmap.c
> >   
> [...]
> 
> >  void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
> >  {
> > -	unsigned long gaddr, vmaddr, size;
> > -	struct vm_area_struct *vma;
> > +	unsigned long vmaddr, next, start, end;
> >  
> >  	mmap_read_lock(gmap->mm);
> > -	for (gaddr = from; gaddr < to;
> > -	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
> > -		/* Find the vm address for the guest address */
> > -		vmaddr = (unsigned long)
> > -			radix_tree_lookup(&gmap->guest_to_host,
> > -					  gaddr >> PMD_SHIFT);
> > +	for ( ; from < to; from = next) {
> > +		next = ALIGN(from + 1, PMD_SIZE);  
> 
> I think you can use pmd_addr_end(from, to) here...

I guess

> 
> > +		vmaddr = (unsigned long)radix_tree_lookup(&gmap->guest_to_host, from >> PMD_SHIFT);
> >  		if (!vmaddr)
> >  			continue;
> > -		vmaddr |= gaddr & ~PMD_MASK;
> > -		/* Find vma in the parent mm */
> > -		vma = find_vma(gmap->mm, vmaddr);
> > -		if (!vma)
> > -			continue;
> > -		/*
> > -		 * We do not discard pages that are backed by
> > -		 * hugetlbfs, so we don't have to refault them.
> > -		 */
> > -		if (is_vm_hugetlb_page(vma))
> > -			continue;
> > -		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
> > -		zap_page_range_single(vma, vmaddr, size, NULL);
> > +		start = vmaddr | (from & ~PMD_MASK);
> > +		end = (vmaddr | (min(to - 1, next - 1) & ~PMD_MASK)) + 1;  
> 
> ... then simply do:
> 		end = vmaddr + (next - from);

looks cleaner, yes

> 
> > +		__gmap_helper_discard(gmap->mm, start, end);
> >  	}
> >  	mmap_read_unlock(gmap->mm);
> >  }
> > diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
> > new file mode 100644
> > index 000000000000..8e66586718f6
> > --- /dev/null
> > +++ b/arch/s390/mm/gmap_helpers.c
> > @@ -0,0 +1,266 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + *  Helper functions for KVM guest address space mapping code
> > + *
> > + *    Copyright IBM Corp. 2007, 2025
> > + *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
> > + *               Martin Schwidefsky <schwidefsky@de.ibm.com>
> > + *               David Hildenbrand <david@redhat.com>
> > + *               Janosch Frank <frankja@linux.vnet.ibm.com>
> > + */
> > +#include <linux/mm_types.h>
> > +#include <linux/mmap_lock.h>
> > +#include <linux/mm.h>
> > +#include <linux/hugetlb.h>
> > +#include <linux/pagewalk.h>
> > +#include <linux/ksm.h>
> > +#include <asm/gmap_helpers.h>  
> 
> Please add documentation to all these functions for those of us that
> don't live and breath mm code :)

eh... yes I think it's a good idea

> 
> > +
> > +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
> > +{
> > +	if (!non_swap_entry(entry))
> > +		dec_mm_counter(mm, MM_SWAPENTS);
> > +	else if (is_migration_entry(entry))
> > +		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
> > +	free_swap_and_cache(entry);
> > +}
> > +
> > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)  
> 
> __gmap_helper_zap_mapping_pte ?

but I'm not taking a pte as parameter

> 
> > +{
> > +	struct vm_area_struct *vma;
> > +	spinlock_t *ptl;
> > +	pte_t *ptep;
> > +
> > +	mmap_assert_locked(mm);
> > +
> > +	/* Find the vm address for the guest address */
> > +	vma = vma_lookup(mm, vmaddr);
> > +	if (!vma || is_vm_hugetlb_page(vma))
> > +		return;
> > +
> > +	/* Get pointer to the page table entry */
> > +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> > +	if (!likely(ptep))  
> 
> if (unlikely(!ptep)) reads nicer to me.

ok

> 
> > +		return;
> > +	if (pte_swap(*ptep))
> > +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> > +	pte_unmap_unlock(ptep, ptl);
> > +}
> > +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);  
> 
> Looks reasonable, but I'm not well versed enough in mm code to evaluate
> that with confidence.
> 
> > +
> > +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)  
> 
> Maybe call this gmap_helper_discard_nolock or something.

maybe __gmap_helper_discard_unlocked?

the __ prefix often implies lack of locking

> 
> > +{
> > +	struct vm_area_struct *vma;
> > +	unsigned long next;
> > +
> > +	mmap_assert_locked(mm);
> > +
> > +	while (vmaddr < end) {
> > +		vma = find_vma_intersection(mm, vmaddr, end);
> > +		if (!vma)
> > +			break;
> > +		vmaddr = max(vmaddr, vma->vm_start);
> > +		next = min(end, vma->vm_end);
> > +		if (!is_vm_hugetlb_page(vma))
> > +			zap_page_range_single(vma, vmaddr, next - vmaddr, NULL);
> > +		vmaddr = next;
> > +	}
> > +}
> > +
> > +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
> > +{
> > +	mmap_read_lock(mm);
> > +	__gmap_helper_discard(mm, vmaddr, end);
> > +	mmap_read_unlock(mm);
> > +}
> > +EXPORT_SYMBOL_GPL(gmap_helper_discard);
> > +
> > +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
> > +{
> > +	struct vm_area_struct *vma;
> > +	pgd_t *pgd;
> > +	p4d_t *p4d;
> > +	pud_t *pud;
> > +
> > +	/* We need a valid VMA, otherwise this is clearly a fault. */
> > +	vma = vma_lookup(mm, addr);
> > +	if (!vma)
> > +		return -EFAULT;
> > +
> > +	pgd = pgd_offset(mm, addr);
> > +	if (!pgd_present(*pgd))
> > +		return -ENOENT;  
> 
> What about pgd_bad?

I don't know, this code is copied verbatim from mm/gmap.c

> 
> > +
> > +	p4d = p4d_offset(pgd, addr);
> > +	if (!p4d_present(*p4d))
> > +		return -ENOENT;
> > +
> > +	pud = pud_offset(p4d, addr);
> > +	if (!pud_present(*pud))
> > +		return -ENOENT;
> > +
> > +	/* Large PUDs are not supported yet. */
> > +	if (pud_leaf(*pud))
> > +		return -EFAULT;
> > +
> > +	*pmdp = pmd_offset(pud, addr);
> > +	return 0;
> > +}
> > +
> > +void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr)  
> 
> What is this function for? Why do you introduce it now?

this is for cmma handling, I'm introducing it now because it needs to
be in this file and I would like to put all the stuff in at once.
I will not need to touch this file anymore if I get this in now.

> 
> > +{
> > +	spinlock_t *ptl;
> > +	pmd_t *pmdp;
> > +	pte_t *ptep;
> > +
> > +	mmap_assert_locked(mm);
> > +
> > +	if (pmd_lookup(mm, vmaddr, &pmdp))
> > +		return;
> > +	ptl = pmd_lock(mm, pmdp);
> > +	if (!pmd_present(*pmdp) || pmd_leaf(*pmdp)) {
> > +		spin_unlock(ptl);
> > +		return;
> > +	}
> > +	spin_unlock(ptl);
> > +
> > +	ptep = pte_offset_map_lock(mm, pmdp, vmaddr, &ptl);
> > +	if (!ptep)
> > +		return;
> > +	/* The last byte of a pte can be changed freely without ipte */
> > +	__atomic64_or(_PAGE_UNUSED, (long *)ptep);
> > +	pte_unmap_unlock(ptep, ptl);
> > +}
> > +EXPORT_SYMBOL_GPL(__gmap_helper_set_unused);  
> 
> The stuff below is from arch/s390/mm/gmap.c right?
> Are you going to delete it from there?

not in this series, but the next series will remove mm/gmap.c altogether

> 
> > +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
> > +				   unsigned long end, struct mm_walk *walk)
> > +{  
> 
> [...]
> 
> > +}
> > +
> > +static const struct mm_walk_ops find_zeropage_ops = {
> > +	.pte_entry      = find_zeropage_pte_entry,
> > +	.walk_lock      = PGWALK_WRLOCK,
> > +};
> > +
> > +/*
> > + * Unshare all shared zeropages, replacing them by anonymous pages. Note that
> > + * we cannot simply zap all shared zeropages, because this could later
> > + * trigger unexpected userfaultfd missing events.
> > + *
> > + * This must be called after mm->context.allow_cow_sharing was
> > + * set to 0, to avoid future mappings of shared zeropages.
> > + *
> > + * mm contracts with s390, that even if mm were to remove a page table,
> > + * and racing with walk_page_range_vma() calling pte_offset_map_lock()
> > + * would fail, it will never insert a page table containing empty zero
> > + * pages once mm_forbids_zeropage(mm) i.e.
> > + * mm->context.allow_cow_sharing is set to 0.
> > + */
> > +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
> > +{  
> 
> [...]
> 
> > +}
> > +
> > +static int __gmap_helper_disable_cow_sharing(struct mm_struct *mm)
> > +{  
> 
> [...]
> 
> > +}
> > +
> > +/*
> > + * Disable most COW-sharing of memory pages for the whole process:
> > + * (1) Disable KSM and unmerge/unshare any KSM pages.
> > + * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
> > + *
> > + * Not that we currently don't bother with COW-shared pages that are shared
> > + * with parent/child processes due to fork().
> > + */
> > +int gmap_helper_disable_cow_sharing(void)
> > +{  
> 
> [...]
> 
> > +}
> > +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);  
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
  2025-05-21 15:19     ` Claudio Imbrenda
@ 2025-05-21 15:30       ` Nina Schoetterl-Glausch
  2025-05-21 15:41         ` Claudio Imbrenda
  0 siblings, 1 reply; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-21 15:30 UTC (permalink / raw)
  To: Claudio Imbrenda
  Cc: linux-kernel, kvm, linux-s390, frankja, borntraeger, seiden, nrb,
	david, hca, agordeev, svens, gor

On Wed, 2025-05-21 at 17:19 +0200, Claudio Imbrenda wrote:
> On Wed, 21 May 2025 16:55:18 +0200
> Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> 
> > On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > > Refactor some gmap functions; move the implementation into a separate
> > > file with only helper functions. The new helper functions work on vm
> > > addresses, leaving all gmap logic in the gmap functions, which mostly
> > > become just wrappers.
> > > 
> > > The whole gmap handling is going to be moved inside KVM soon, but the
> > > helper functions need to touch core mm functions, and thus need to
> > > stay in the core of kernel.
> > > 
> > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > ---
> > >  MAINTAINERS                          |   2 +
> > >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> > >  arch/s390/kvm/diag.c                 |  11 +-
> > >  arch/s390/kvm/kvm-s390.c             |   3 +-
> > >  arch/s390/mm/Makefile                |   2 +
> > >  arch/s390/mm/gmap.c                  |  46 ++---
> > >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> > >  7 files changed, 307 insertions(+), 41 deletions(-)
> > >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> > >  create mode 100644 arch/s390/mm/gmap_helpers.c
> > >   
[...]

> > > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)  
> > 
> > __gmap_helper_zap_mapping_pte ?
> 
> but I'm not taking a pte as parameter

The pte being zapped is the one mapping vmaddr, right?
> 
> > 
> > > +{
> > > +	struct vm_area_struct *vma;
> > > +	spinlock_t *ptl;
> > > +	pte_t *ptep;
> > > +
> > > +	mmap_assert_locked(mm);
> > > +
> > > +	/* Find the vm address for the guest address */
> > > +	vma = vma_lookup(mm, vmaddr);
> > > +	if (!vma || is_vm_hugetlb_page(vma))
> > > +		return;
> > > +
> > > +	/* Get pointer to the page table entry */
> > > +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> > > +	if (!likely(ptep))  
> > 
> > if (unlikely(!ptep)) reads nicer to me.
> 
> ok
> 
> > 
> > > +		return;
> > > +	if (pte_swap(*ptep))
> > > +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> > > +	pte_unmap_unlock(ptep, ptl);
> > > +}
> > > +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);  
> > 
> > Looks reasonable, but I'm not well versed enough in mm code to evaluate
> > that with confidence.
> > 
> > > +
> > > +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)  
> > 
> > Maybe call this gmap_helper_discard_nolock or something.
> 
> maybe __gmap_helper_discard_unlocked?
> 
> the __ prefix often implies lack of locking

_nolock *definitely* implies it :P

[...]

> > 
> > The stuff below is from arch/s390/mm/gmap.c right?
> > Are you going to delete it from there?
> 
> not in this series, but the next series will remove mm/gmap.c altogether

Can't you do it with this one?


[...]
-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
  2025-05-21 15:30       ` Nina Schoetterl-Glausch
@ 2025-05-21 15:41         ` Claudio Imbrenda
  2025-05-21 15:46           ` Nina Schoetterl-Glausch
  0 siblings, 1 reply; 21+ messages in thread
From: Claudio Imbrenda @ 2025-05-21 15:41 UTC (permalink / raw)
  To: Nina Schoetterl-Glausch
  Cc: linux-kernel, kvm, linux-s390, frankja, borntraeger, seiden, nrb,
	david, hca, agordeev, svens, gor

On Wed, 21 May 2025 17:30:00 +0200
Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:

> On Wed, 2025-05-21 at 17:19 +0200, Claudio Imbrenda wrote:
> > On Wed, 21 May 2025 16:55:18 +0200
> > Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> >   
> > > On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:  
> > > > Refactor some gmap functions; move the implementation into a separate
> > > > file with only helper functions. The new helper functions work on vm
> > > > addresses, leaving all gmap logic in the gmap functions, which mostly
> > > > become just wrappers.
> > > > 
> > > > The whole gmap handling is going to be moved inside KVM soon, but the
> > > > helper functions need to touch core mm functions, and thus need to
> > > > stay in the core of kernel.
> > > > 
> > > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > > ---
> > > >  MAINTAINERS                          |   2 +
> > > >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> > > >  arch/s390/kvm/diag.c                 |  11 +-
> > > >  arch/s390/kvm/kvm-s390.c             |   3 +-
> > > >  arch/s390/mm/Makefile                |   2 +
> > > >  arch/s390/mm/gmap.c                  |  46 ++---
> > > >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> > > >  7 files changed, 307 insertions(+), 41 deletions(-)
> > > >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> > > >  create mode 100644 arch/s390/mm/gmap_helpers.c
> > > >     
> [...]
> 
> > > > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)    
> > > 
> > > __gmap_helper_zap_mapping_pte ?  
> > 
> > but I'm not taking a pte as parameter  
> 
> The pte being zapped is the one mapping vmaddr, right?

I don't know, _pte kinda sounds to me as the function would be taking a
pte as parameter

> >   
> > >   
> > > > +{
> > > > +	struct vm_area_struct *vma;
> > > > +	spinlock_t *ptl;
> > > > +	pte_t *ptep;
> > > > +
> > > > +	mmap_assert_locked(mm);
> > > > +
> > > > +	/* Find the vm address for the guest address */
> > > > +	vma = vma_lookup(mm, vmaddr);
> > > > +	if (!vma || is_vm_hugetlb_page(vma))
> > > > +		return;
> > > > +
> > > > +	/* Get pointer to the page table entry */
> > > > +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> > > > +	if (!likely(ptep))    
> > > 
> > > if (unlikely(!ptep)) reads nicer to me.  
> > 
> > ok
> >   
> > >   
> > > > +		return;
> > > > +	if (pte_swap(*ptep))
> > > > +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> > > > +	pte_unmap_unlock(ptep, ptl);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);    
> > > 
> > > Looks reasonable, but I'm not well versed enough in mm code to evaluate
> > > that with confidence.
> > >   
> > > > +
> > > > +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)    
> > > 
> > > Maybe call this gmap_helper_discard_nolock or something.  
> > 
> > maybe __gmap_helper_discard_unlocked?
> > 
> > the __ prefix often implies lack of locking  
> 
> _nolock *definitely* implies it :P
> 
> [...]
> 
> > > 
> > > The stuff below is from arch/s390/mm/gmap.c right?
> > > Are you going to delete it from there?  
> > 
> > not in this series, but the next series will remove mm/gmap.c altogether  
> 
> Can't you do it with this one?

if you mean removing mm/gmap.c, no. I would need to push the whole gmap
rewrite series, which is not ready yet.

if you mean removing the redundant functions... I guess I could

> 
> 
> [...]


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 5/5] KVM: s390: simplify and move pv code
  2025-05-14 16:38 ` [PATCH v1 5/5] KVM: s390: simplify and move pv code Claudio Imbrenda
@ 2025-05-21 15:42   ` Nina Schoetterl-Glausch
  0 siblings, 0 replies; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-21 15:42 UTC (permalink / raw)
  To: Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, borntraeger, seiden, nrb, david, hca,
	agordeev, svens, gor

On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> All functions in kvm/gmap.c fit better in kvm/pv.c instead.
> Move and rename them appropriately, then delete the now empty
> kvm/gmap.c and kvm/gmap.h.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

Reviewed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>

[...]

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
  2025-05-21 15:41         ` Claudio Imbrenda
@ 2025-05-21 15:46           ` Nina Schoetterl-Glausch
  0 siblings, 0 replies; 21+ messages in thread
From: Nina Schoetterl-Glausch @ 2025-05-21 15:46 UTC (permalink / raw)
  To: Claudio Imbrenda
  Cc: linux-kernel, kvm, linux-s390, frankja, borntraeger, seiden, nrb,
	david, hca, agordeev, svens, gor

On Wed, 2025-05-21 at 17:41 +0200, Claudio Imbrenda wrote:
> On Wed, 21 May 2025 17:30:00 +0200
> Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> 
> > On Wed, 2025-05-21 at 17:19 +0200, Claudio Imbrenda wrote:
> > > On Wed, 21 May 2025 16:55:18 +0200
> > > Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> > >   
> > > > On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:  
> > > > > Refactor some gmap functions; move the implementation into a separate
> > > > > file with only helper functions. The new helper functions work on vm
> > > > > addresses, leaving all gmap logic in the gmap functions, which mostly
> > > > > become just wrappers.
> > > > > 
> > > > > The whole gmap handling is going to be moved inside KVM soon, but the
> > > > > helper functions need to touch core mm functions, and thus need to
> > > > > stay in the core of kernel.
> > > > > 
> > > > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > > > ---
> > > > >  MAINTAINERS                          |   2 +
> > > > >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> > > > >  arch/s390/kvm/diag.c                 |  11 +-
> > > > >  arch/s390/kvm/kvm-s390.c             |   3 +-
> > > > >  arch/s390/mm/Makefile                |   2 +
> > > > >  arch/s390/mm/gmap.c                  |  46 ++---
> > > > >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> > > > >  7 files changed, 307 insertions(+), 41 deletions(-)
> > > > >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> > > > >  create mode 100644 arch/s390/mm/gmap_helpers.c
> > > > >     
> > [...]
> > 
> > > > > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)    
> > > > 
> > > > __gmap_helper_zap_mapping_pte ?  
> > > 
> > > but I'm not taking a pte as parameter  
> > 
> > The pte being zapped is the one mapping vmaddr, right?
> 
> I don't know, _pte kinda sounds to me as the function would be taking a
> pte as parameter

__gmap_helper_zap_pte_mapping_addr ?

IMO __gmap_helper_zap_one is rather vague. Zap one? Which one?

[...]

> > > 
> > > > The stuff below is from arch/s390/mm/gmap.c right?
> > > > Are you going to delete it from there?  
> > > 
> > > not in this series, but the next series will remove mm/gmap.c altogether  
> > 
> > Can't you do it with this one?
> 
> if you mean removing mm/gmap.c, no. I would need to push the whole gmap
> rewrite series, which is not ready yet.
> 
> if you mean removing the redundant functions... I guess I could

The latter. I think that would be cleaner.
> 
> > 
> > 
> > [...]

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock
  2025-05-20 14:34     ` Nina Schoetterl-Glausch
@ 2025-06-27  8:45       ` Christian Borntraeger
  0 siblings, 0 replies; 21+ messages in thread
From: Christian Borntraeger @ 2025-06-27  8:45 UTC (permalink / raw)
  To: Nina Schoetterl-Glausch, Claudio Imbrenda, linux-kernel
  Cc: kvm, linux-s390, frankja, seiden, nrb, david, hca, agordeev,
	svens, gor



Am 20.05.25 um 16:34 schrieb Nina Schoetterl-Glausch:
> On Mon, 2025-05-19 at 16:42 +0200, Nina Schoetterl-Glausch wrote:
>> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
>>> All paths leading to handle_essa() already hold the kvm->srcu.
>>> Remove unneeded srcu locking from handle_essa().
>>>
>>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>
>> Reviewed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
>>
>> Why are you removing it tho?
>> It should be very low cost and it makes the code more robust,
>> since handle_essa itself ensures that it has the lock.
>> It is also easier to understand which synchronization the function does.
>> You could of course add a comment stating that the kvm srcu read side needs
>> to be held. I think this would be good to have if you really don't want the
>> srcu_read_lock here.
>> But then you might also want that documented up the call chain.
> 
> Actually, can we use __must_hold or have some assert?

Yes, that might be the best way.


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2025-06-27  8:45 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-14 16:38 [PATCH v1 0/5] KVM: s390: some cleanup and small fixes Claudio Imbrenda
2025-05-14 16:38 ` [PATCH v1 1/5] s390: remove unneeded includes Claudio Imbrenda
2025-05-15 13:56   ` Christoph Schlameuss
2025-05-15 15:29     ` Claudio Imbrenda
2025-05-14 16:38 ` [PATCH v1 2/5] KVM: s390: remove unneeded srcu lock Claudio Imbrenda
2025-05-19  8:25   ` Christian Borntraeger
2025-05-19 14:42   ` Nina Schoetterl-Glausch
2025-05-20 14:34     ` Nina Schoetterl-Glausch
2025-06-27  8:45       ` Christian Borntraeger
2025-05-14 16:38 ` [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c Claudio Imbrenda
2025-05-20 12:49   ` Nina Schoetterl-Glausch
2025-05-20 14:39     ` Claudio Imbrenda
2025-05-14 16:38 ` [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers Claudio Imbrenda
2025-05-21 14:55   ` Nina Schoetterl-Glausch
2025-05-21 15:19     ` Claudio Imbrenda
2025-05-21 15:30       ` Nina Schoetterl-Glausch
2025-05-21 15:41         ` Claudio Imbrenda
2025-05-21 15:46           ` Nina Schoetterl-Glausch
2025-05-14 16:38 ` [PATCH v1 5/5] KVM: s390: simplify and move pv code Claudio Imbrenda
2025-05-21 15:42   ` Nina Schoetterl-Glausch
2025-05-14 17:41 ` [PATCH v1 0/5] KVM: s390: some cleanup and small fixes David Hildenbrand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).