All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marc Zyngier <maz@kernel.org>
To: kvmarm@lists.linux.dev, kvm@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
Cc: Alexandru Elisei <alexandru.elisei@arm.com>,
	Andre Przywara <andre.przywara@arm.com>,
	Chase Conklin <chase.conklin@arm.com>,
	Christoffer Dall <christoffer.dall@arm.com>,
	Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>,
	Darren Hart <darren@os.amperecomputing.com>,
	Jintack Lim <jintack@cs.columbia.edu>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Miguel Luis <miguel.luis@oracle.com>,
	James Morse <james.morse@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Oliver Upton <oliver.upton@linux.dev>,
	Zenghui Yu <yuzenghui@huawei.com>
Subject: [PATCH v10 27/59] KVM: arm64: nv: Handle shadow stage 2 page faults
Date: Mon, 15 May 2023 18:30:31 +0100	[thread overview]
Message-ID: <20230515173103.1017669-28-maz@kernel.org> (raw)
In-Reply-To: <20230515173103.1017669-1-maz@kernel.org>

If we are faulting on a shadow stage 2 translation, we first walk the
guest hypervisor's stage 2 page table to see if it has a mapping. If
not, we inject a stage 2 page fault to the virtual EL2. Otherwise, we
create a mapping in the shadow stage 2 page table.

Note that we have to deal with two IPAs when we got a shadow stage 2
page fault. One is the address we faulted on, and is in the L2 guest
phys space. The other is from the guest stage-2 page table walk, and is
in the L1 guest phys space.  To differentiate them, we rename variables
so that fault_ipa is used for the former and ipa is used for the latter.

Co-developed-by: Christoffer Dall <christoffer.dall@linaro.org>
Co-developed-by: Jintack Lim <jintack.lim@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
[maz: rewrote this multiple times...]
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_emulate.h |  6 ++
 arch/arm64/include/asm/kvm_nested.h  | 19 ++++++
 arch/arm64/kvm/mmu.c                 | 89 ++++++++++++++++++++++++----
 arch/arm64/kvm/nested.c              | 48 +++++++++++++++
 4 files changed, 152 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index c64980d33c8b..f4ae0c61fa1c 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -652,4 +652,10 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
 	return test_bit(feature, vcpu->arch.features);
 }
 
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.hw_mmu != &vcpu->kvm->arch.mmu &&
+		vcpu->arch.hw_mmu->nested_stage2_enabled);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 5306f7ca5e07..d2cee4faa387 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -76,8 +76,27 @@ struct kvm_s2_trans {
 	u64 upper_attr;
 };
 
+static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
+{
+	return trans->output;
+}
+
+static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans)
+{
+	return trans->block_size;
+}
+
+static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans)
+{
+	return trans->esr;
+}
+
 extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 			      struct kvm_s2_trans *result);
+extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+				    struct kvm_s2_trans *trans);
+extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe);
 
 extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
 extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3268936eec40..bc6f004d4ffb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1253,14 +1253,16 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot, unsigned long hva,
-			  unsigned long fault_status)
+			  struct kvm_s2_trans *nested,
+			  struct kvm_memory_slot *memslot,
+			  unsigned long hva, unsigned long fault_status)
 {
 	int ret = 0;
 	bool write_fault, writable, force_pte = false;
 	bool exec_fault, mte_allowed;
 	bool device = false;
 	unsigned long mmu_seq;
+	phys_addr_t ipa = fault_ipa;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -1345,10 +1347,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	}
 
 	vma_pagesize = 1UL << vma_shift;
+
+	if (nested) {
+		unsigned long max_map_size;
+
+		max_map_size = force_pte ? PUD_SIZE : PAGE_SIZE;
+
+		ipa = kvm_s2_trans_output(nested);
+
+		/*
+		 * If we're about to create a shadow stage 2 entry, then we
+		 * can only create a block mapping if the guest stage 2 page
+		 * table uses at least as big a mapping.
+		 */
+		max_map_size = min(kvm_s2_trans_size(nested), max_map_size);
+
+		/*
+		 * Be careful that if the mapping size falls between
+		 * two host sizes, take the smallest of the two.
+		 */
+		if (max_map_size >= PMD_SIZE && max_map_size < PUD_SIZE)
+			max_map_size = PMD_SIZE;
+		else if (max_map_size >= PAGE_SIZE && max_map_size < PMD_SIZE)
+			max_map_size = PAGE_SIZE;
+
+		force_pte = (max_map_size == PAGE_SIZE);
+		vma_pagesize = min(vma_pagesize, (long)max_map_size);
+	}
+
 	if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
 		fault_ipa &= ~(vma_pagesize - 1);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	gfn = ipa >> PAGE_SHIFT;
 	mte_allowed = kvm_vma_mte_allowed(vma);
 
 	/* Don't use the VMA after the unlock -- it may have vanished */
@@ -1499,8 +1529,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
  */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s2_trans nested_trans, *nested = NULL;
 	unsigned long fault_status;
-	phys_addr_t fault_ipa;
+	phys_addr_t fault_ipa; /* The address we faulted on */
+	phys_addr_t ipa; /* Always the IPA in the L1 guest phys space */
 	struct kvm_memory_slot *memslot;
 	unsigned long hva;
 	bool is_iabt, write_fault, writable;
@@ -1509,7 +1541,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 
-	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 
 	if (fault_status == ESR_ELx_FSC_FAULT) {
@@ -1550,6 +1582,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 	if (fault_status != ESR_ELx_FSC_FAULT &&
 	    fault_status != ESR_ELx_FSC_PERM &&
 	    fault_status != ESR_ELx_FSC_ACCESS) {
+		/*
+		 * We must never see an address size fault on shadow stage 2
+		 * page table walk, because we would have injected an addr
+		 * size fault when we walked the nested s2 page and not
+		 * create the shadow entry.
+		 */
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1559,7 +1597,37 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	/*
+	 * We may have faulted on a shadow stage 2 page table if we are
+	 * running a nested guest.  In this case, we have to resolve the L2
+	 * IPA to the L1 IPA first, before knowing what kind of memory should
+	 * back the L1 IPA.
+	 *
+	 * If the shadow stage 2 page table walk faults, then we simply inject
+	 * this to the guest and carry on.
+	 */
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		u32 esr;
+
+		ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+		if (ret) {
+			esr = kvm_s2_trans_esr(&nested_trans);
+			kvm_inject_s2_fault(vcpu, esr);
+			goto out_unlock;
+		}
+
+		ret = kvm_s2_handle_perm_fault(vcpu, &nested_trans);
+		if (ret) {
+			esr = kvm_s2_trans_esr(&nested_trans);
+			kvm_inject_s2_fault(vcpu, esr);
+			goto out_unlock;
+		}
+
+		ipa = kvm_s2_trans_output(&nested_trans);
+		nested = &nested_trans;
+	}
+
+	gfn = ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
 	write_fault = kvm_is_write_fault(vcpu);
@@ -1603,13 +1671,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 		 * faulting VA. This is always 12 bits, irrespective
 		 * of the page size.
 		 */
-		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-		ret = io_mem_abort(vcpu, fault_ipa);
+		ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+		ret = io_mem_abort(vcpu, ipa);
 		goto out_unlock;
 	}
 
 	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
+	VM_BUG_ON(ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
 
 	if (fault_status == ESR_ELx_FSC_ACCESS) {
 		handle_access_fault(vcpu, fault_ipa);
@@ -1617,7 +1685,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 		goto out_unlock;
 	}
 
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, nested,
+			     memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out:
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 9ab06055cd35..2538e0c632f2 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -111,6 +111,15 @@ static u32 compute_fsc(int level, u32 fsc)
 	return fsc | (level & 0x3);
 }
 
+static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+{
+	u32 esr;
+
+	esr = kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC;
+	esr |= compute_fsc(level, fsc);
+	return esr;
+}
+
 static int check_base_s2_limits(struct s2_walk_info *wi,
 				int level, int input_size, int stride)
 {
@@ -475,6 +484,45 @@ void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
 	}
 }
 
+/*
+ * Returns non-zero if permission fault is handled by injecting it to the next
+ * level hypervisor.
+ */
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
+{
+	unsigned long fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+	bool forward_fault = false;
+
+	trans->esr = 0;
+
+	if (fault_status != ESR_ELx_FSC_PERM)
+		return 0;
+
+	if (kvm_vcpu_trap_is_iabt(vcpu)) {
+		forward_fault = (trans->upper_attr & BIT(54));
+	} else {
+		bool write_fault = kvm_is_write_fault(vcpu);
+
+		forward_fault = ((write_fault && !trans->writable) ||
+				 (!write_fault && !trans->readable));
+	}
+
+	if (forward_fault) {
+		trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
+		return 1;
+	}
+
+	return 0;
+}
+
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	vcpu_write_sys_reg(vcpu, vcpu->arch.fault.far_el2, FAR_EL2);
+	vcpu_write_sys_reg(vcpu, vcpu->arch.fault.hpfar_el2, HPFAR_EL2);
+
+	return kvm_inject_nested_sync(vcpu, esr_el2);
+}
+
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	int i;
-- 
2.34.1


WARNING: multiple messages have this Message-ID (diff)
From: Marc Zyngier <maz@kernel.org>
To: kvmarm@lists.linux.dev, kvm@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
Cc: Alexandru Elisei <alexandru.elisei@arm.com>,
	Andre Przywara <andre.przywara@arm.com>,
	Chase Conklin <chase.conklin@arm.com>,
	Christoffer Dall <christoffer.dall@arm.com>,
	Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>,
	Darren Hart <darren@os.amperecomputing.com>,
	Jintack Lim <jintack@cs.columbia.edu>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Miguel Luis <miguel.luis@oracle.com>,
	James Morse <james.morse@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Oliver Upton <oliver.upton@linux.dev>,
	Zenghui Yu <yuzenghui@huawei.com>
Subject: [PATCH v10 27/59] KVM: arm64: nv: Handle shadow stage 2 page faults
Date: Mon, 15 May 2023 18:30:31 +0100	[thread overview]
Message-ID: <20230515173103.1017669-28-maz@kernel.org> (raw)
In-Reply-To: <20230515173103.1017669-1-maz@kernel.org>

If we are faulting on a shadow stage 2 translation, we first walk the
guest hypervisor's stage 2 page table to see if it has a mapping. If
not, we inject a stage 2 page fault to the virtual EL2. Otherwise, we
create a mapping in the shadow stage 2 page table.

Note that we have to deal with two IPAs when we got a shadow stage 2
page fault. One is the address we faulted on, and is in the L2 guest
phys space. The other is from the guest stage-2 page table walk, and is
in the L1 guest phys space.  To differentiate them, we rename variables
so that fault_ipa is used for the former and ipa is used for the latter.

Co-developed-by: Christoffer Dall <christoffer.dall@linaro.org>
Co-developed-by: Jintack Lim <jintack.lim@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Jintack Lim <jintack.lim@linaro.org>
[maz: rewrote this multiple times...]
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_emulate.h |  6 ++
 arch/arm64/include/asm/kvm_nested.h  | 19 ++++++
 arch/arm64/kvm/mmu.c                 | 89 ++++++++++++++++++++++++----
 arch/arm64/kvm/nested.c              | 48 +++++++++++++++
 4 files changed, 152 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index c64980d33c8b..f4ae0c61fa1c 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -652,4 +652,10 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
 	return test_bit(feature, vcpu->arch.features);
 }
 
+static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.hw_mmu != &vcpu->kvm->arch.mmu &&
+		vcpu->arch.hw_mmu->nested_stage2_enabled);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 5306f7ca5e07..d2cee4faa387 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -76,8 +76,27 @@ struct kvm_s2_trans {
 	u64 upper_attr;
 };
 
+static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
+{
+	return trans->output;
+}
+
+static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans)
+{
+	return trans->block_size;
+}
+
+static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans)
+{
+	return trans->esr;
+}
+
 extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 			      struct kvm_s2_trans *result);
+extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+				    struct kvm_s2_trans *trans);
+extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe);
 
 extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
 extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3268936eec40..bc6f004d4ffb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1253,14 +1253,16 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot, unsigned long hva,
-			  unsigned long fault_status)
+			  struct kvm_s2_trans *nested,
+			  struct kvm_memory_slot *memslot,
+			  unsigned long hva, unsigned long fault_status)
 {
 	int ret = 0;
 	bool write_fault, writable, force_pte = false;
 	bool exec_fault, mte_allowed;
 	bool device = false;
 	unsigned long mmu_seq;
+	phys_addr_t ipa = fault_ipa;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -1345,10 +1347,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	}
 
 	vma_pagesize = 1UL << vma_shift;
+
+	if (nested) {
+		unsigned long max_map_size;
+
+		max_map_size = force_pte ? PUD_SIZE : PAGE_SIZE;
+
+		ipa = kvm_s2_trans_output(nested);
+
+		/*
+		 * If we're about to create a shadow stage 2 entry, then we
+		 * can only create a block mapping if the guest stage 2 page
+		 * table uses at least as big a mapping.
+		 */
+		max_map_size = min(kvm_s2_trans_size(nested), max_map_size);
+
+		/*
+		 * Be careful that if the mapping size falls between
+		 * two host sizes, take the smallest of the two.
+		 */
+		if (max_map_size >= PMD_SIZE && max_map_size < PUD_SIZE)
+			max_map_size = PMD_SIZE;
+		else if (max_map_size >= PAGE_SIZE && max_map_size < PMD_SIZE)
+			max_map_size = PAGE_SIZE;
+
+		force_pte = (max_map_size == PAGE_SIZE);
+		vma_pagesize = min(vma_pagesize, (long)max_map_size);
+	}
+
 	if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
 		fault_ipa &= ~(vma_pagesize - 1);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	gfn = ipa >> PAGE_SHIFT;
 	mte_allowed = kvm_vma_mte_allowed(vma);
 
 	/* Don't use the VMA after the unlock -- it may have vanished */
@@ -1499,8 +1529,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
  */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s2_trans nested_trans, *nested = NULL;
 	unsigned long fault_status;
-	phys_addr_t fault_ipa;
+	phys_addr_t fault_ipa; /* The address we faulted on */
+	phys_addr_t ipa; /* Always the IPA in the L1 guest phys space */
 	struct kvm_memory_slot *memslot;
 	unsigned long hva;
 	bool is_iabt, write_fault, writable;
@@ -1509,7 +1541,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 
-	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 
 	if (fault_status == ESR_ELx_FSC_FAULT) {
@@ -1550,6 +1582,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 	if (fault_status != ESR_ELx_FSC_FAULT &&
 	    fault_status != ESR_ELx_FSC_PERM &&
 	    fault_status != ESR_ELx_FSC_ACCESS) {
+		/*
+		 * We must never see an address size fault on shadow stage 2
+		 * page table walk, because we would have injected an addr
+		 * size fault when we walked the nested s2 page and not
+		 * create the shadow entry.
+		 */
 		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
 			kvm_vcpu_trap_get_class(vcpu),
 			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1559,7 +1597,37 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-	gfn = fault_ipa >> PAGE_SHIFT;
+	/*
+	 * We may have faulted on a shadow stage 2 page table if we are
+	 * running a nested guest.  In this case, we have to resolve the L2
+	 * IPA to the L1 IPA first, before knowing what kind of memory should
+	 * back the L1 IPA.
+	 *
+	 * If the shadow stage 2 page table walk faults, then we simply inject
+	 * this to the guest and carry on.
+	 */
+	if (kvm_is_shadow_s2_fault(vcpu)) {
+		u32 esr;
+
+		ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+		if (ret) {
+			esr = kvm_s2_trans_esr(&nested_trans);
+			kvm_inject_s2_fault(vcpu, esr);
+			goto out_unlock;
+		}
+
+		ret = kvm_s2_handle_perm_fault(vcpu, &nested_trans);
+		if (ret) {
+			esr = kvm_s2_trans_esr(&nested_trans);
+			kvm_inject_s2_fault(vcpu, esr);
+			goto out_unlock;
+		}
+
+		ipa = kvm_s2_trans_output(&nested_trans);
+		nested = &nested_trans;
+	}
+
+	gfn = ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
 	write_fault = kvm_is_write_fault(vcpu);
@@ -1603,13 +1671,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 		 * faulting VA. This is always 12 bits, irrespective
 		 * of the page size.
 		 */
-		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-		ret = io_mem_abort(vcpu, fault_ipa);
+		ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+		ret = io_mem_abort(vcpu, ipa);
 		goto out_unlock;
 	}
 
 	/* Userspace should not be able to register out-of-bounds IPAs */
-	VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
+	VM_BUG_ON(ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
 
 	if (fault_status == ESR_ELx_FSC_ACCESS) {
 		handle_access_fault(vcpu, fault_ipa);
@@ -1617,7 +1685,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 		goto out_unlock;
 	}
 
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, nested,
+			     memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out:
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 9ab06055cd35..2538e0c632f2 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -111,6 +111,15 @@ static u32 compute_fsc(int level, u32 fsc)
 	return fsc | (level & 0x3);
 }
 
+static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+{
+	u32 esr;
+
+	esr = kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC;
+	esr |= compute_fsc(level, fsc);
+	return esr;
+}
+
 static int check_base_s2_limits(struct s2_walk_info *wi,
 				int level, int input_size, int stride)
 {
@@ -475,6 +484,45 @@ void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
 	}
 }
 
+/*
+ * Returns non-zero if permission fault is handled by injecting it to the next
+ * level hypervisor.
+ */
+int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
+{
+	unsigned long fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+	bool forward_fault = false;
+
+	trans->esr = 0;
+
+	if (fault_status != ESR_ELx_FSC_PERM)
+		return 0;
+
+	if (kvm_vcpu_trap_is_iabt(vcpu)) {
+		forward_fault = (trans->upper_attr & BIT(54));
+	} else {
+		bool write_fault = kvm_is_write_fault(vcpu);
+
+		forward_fault = ((write_fault && !trans->writable) ||
+				 (!write_fault && !trans->readable));
+	}
+
+	if (forward_fault) {
+		trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
+		return 1;
+	}
+
+	return 0;
+}
+
+int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+	vcpu_write_sys_reg(vcpu, vcpu->arch.fault.far_el2, FAR_EL2);
+	vcpu_write_sys_reg(vcpu, vcpu->arch.fault.hpfar_el2, HPFAR_EL2);
+
+	return kvm_inject_nested_sync(vcpu, esr_el2);
+}
+
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	int i;
-- 
2.34.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2023-05-15 17:46 UTC|newest]

Thread overview: 189+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-15 17:30 [PATCH v10 00/59] KVM: arm64: ARMv8.3/8.4 Nested Virtualization support Marc Zyngier
2023-05-15 17:30 ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 01/59] KVM: arm64: Move VTCR_EL2 into struct s2_mmu Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 02/59] arm64: Add missing Set/Way CMO encodings Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 03/59] arm64: Add missing VA " Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-06-05 17:46   ` Eric Auger
2023-06-05 17:46     ` Eric Auger
2023-05-15 17:30 ` [PATCH v10 04/59] arm64: Add missing ERXMISCx_EL1 encodings Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-06-05 17:47   ` Eric Auger
2023-06-05 17:47     ` Eric Auger
2023-05-15 17:30 ` [PATCH v10 05/59] arm64: Add missing DC ZVA/GVA/GZVA encodings Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-06-05 17:47   ` Eric Auger
2023-06-05 17:47     ` Eric Auger
2023-05-15 17:30 ` [PATCH v10 06/59] arm64: Add TLBI operation encodings Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-06-06  9:33   ` Eric Auger
2023-06-06  9:33     ` Eric Auger
2023-05-15 17:30 ` [PATCH v10 07/59] arm64: Add AT " Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-06-14 15:08   ` Eric Auger
2023-06-14 15:08     ` Eric Auger
2023-05-15 17:30 ` [PATCH v10 08/59] KVM: arm64: Add missing HCR_EL2 trap bits Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-06-14 15:08   ` Eric Auger
2023-06-14 15:08     ` Eric Auger
2023-05-15 17:30 ` [PATCH v10 09/59] KVM: arm64: nv: Add trap forwarding infrastructure Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-07-13 14:29   ` Eric Auger
2023-07-13 14:29     ` Eric Auger
2023-07-14 10:53     ` Marc Zyngier
2023-07-14 10:53       ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 10/59] KVM: arm64: nv: Add trap forwarding for HCR_EL2 Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 11/59] KVM: arm64: nv: Expose FEAT_EVT to nested guests Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 12/59] KVM: arm64: nv: Add trap forwarding for MDCR_EL2 Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 13/59] KVM: arm64: nv: Add trap forwarding for CNTHCTL_EL2 Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 14/59] KVM: arm64: nv: Add non-VHE-EL2->EL1 translation helpers Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 15/59] KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg() Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 16/59] KVM: arm64: nv: Handle SPSR_EL2 specially Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 17/59] KVM: arm64: nv: Handle HCR_EL2.E2H specially Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 18/59] KVM: arm64: nv: Save/Restore vEL2 sysregs Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 19/59] KVM: arm64: nv: Trap EL1 VM register accesses in virtual EL2 Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 20/59] KVM: arm64: nv: Trap CPACR_EL1 access " Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 21/59] KVM: arm64: nv: Respect virtual HCR_EL2.TWX setting Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 22/59] KVM: arm64: nv: Respect virtual CPTR_EL2.{TFP,FPEN} settings Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 23/59] KVM: arm64: nv: Respect virtual HCR_EL2.{NV,TSC) settings Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 24/59] KVM: arm64: nv: Configure HCR_EL2 for nested virtualization Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 25/59] KVM: arm64: nv: Support multiple nested Stage-2 mmu structures Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 26/59] KVM: arm64: nv: Implement nested Stage-2 page table walk logic Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` Marc Zyngier [this message]
2023-05-15 17:30   ` [PATCH v10 27/59] KVM: arm64: nv: Handle shadow stage 2 page faults Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 28/59] KVM: arm64: nv: Restrict S2 RD/WR permissions to match the guest's Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 29/59] KVM: arm64: nv: Unmap/flush shadow stage 2 page tables Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-09-14 13:10   ` Ganapatrao Kulkarni
2023-09-14 13:10     ` Ganapatrao Kulkarni
2023-09-14 13:37     ` Marc Zyngier
2023-09-14 13:37       ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 30/59] KVM: arm64: nv: Set a handler for the system instruction traps Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 31/59] KVM: arm64: nv: Trap and emulate AT instructions from virtual EL2 Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 32/59] KVM: arm64: nv: Trap and emulate TLBI " Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 33/59] KVM: arm64: nv: Fold guest's HCR_EL2 configuration into the host's Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 34/59] KVM: arm64: nv: Hide RAS from nested guests Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 35/59] KVM: arm64: nv: Add handling of EL2-specific timer registers Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 36/59] KVM: arm64: nv: Load timer before the GIC Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 37/59] KVM: arm64: nv: Nested GICv3 Support Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 38/59] KVM: arm64: nv: Don't load the GICv4 context on entering a nested guest Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 39/59] KVM: arm64: nv: vgic: Emulate the HW bit in software Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 40/59] KVM: arm64: nv: vgic: Allow userland to set VGIC maintenance IRQ Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 41/59] KVM: arm64: nv: Implement maintenance interrupt forwarding Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 42/59] KVM: arm64: nv: Deal with broken VGIC on maintenance interrupt delivery Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 43/59] KVM: arm64: nv: Allow userspace to request KVM_ARM_VCPU_NESTED_VIRT Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 44/59] KVM: arm64: nv: Add handling of FEAT_TTL TLB invalidation Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 45/59] KVM: arm64: nv: Invalidate TLBs based on shadow S2 TTL-like information Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 46/59] KVM: arm64: nv: Tag shadow S2 entries with nested level Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 47/59] KVM: arm64: nv: Add include containing the VNCR_EL2 offsets Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 48/59] KVM: arm64: nv: Map VNCR-capable registers to a separate page Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 49/59] KVM: arm64: nv: Move nested vgic state into the sysreg file Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 50/59] KVM: arm64: Add FEAT_NV2 cpu feature Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 51/59] KVM: arm64: nv: Sync nested timer state with FEAT_NV2 Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 52/59] KVM: arm64: nv: Fold GICv3 host trapping requirements into guest setup Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 53/59] KVM: arm64: nv: Publish emulated timer interrupt state in the in-memory state Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 54/59] KVM: arm64: nv: Allocate VNCR page when required Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:30 ` [PATCH v10 55/59] KVM: arm64: nv: Enable ARMv8.4-NV support Marc Zyngier
2023-05-15 17:30   ` Marc Zyngier
2023-05-15 17:31 ` [PATCH v10 56/59] KVM: arm64: nv: Fast-track 'InHost' exception returns Marc Zyngier
2023-05-15 17:31   ` Marc Zyngier
2023-05-15 17:31 ` [PATCH v10 57/59] KVM: arm64: nv: Fast-track EL1 TLBIs for VHE guests Marc Zyngier
2023-05-15 17:31   ` Marc Zyngier
2023-05-15 17:31 ` [PATCH v10 58/59] KVM: arm64: nv: Use FEAT_ECV to trap access to EL0 timers Marc Zyngier
2023-05-15 17:31   ` Marc Zyngier
2023-05-15 17:31 ` [PATCH v10 59/59] KVM: arm64: nv: Accelerate EL0 timer read accesses when FEAT_ECV is on Marc Zyngier
2023-05-15 17:31   ` Marc Zyngier
2023-05-16 16:53 ` [PATCH v10 00/59] KVM: arm64: ARMv8.3/8.4 Nested Virtualization support Eric Auger
2023-05-16 16:53   ` Eric Auger
2023-05-16 18:47   ` Marc Zyngier
2023-05-16 18:47     ` Marc Zyngier
2023-05-16 20:28   ` Marc Zyngier
2023-05-16 20:28     ` Marc Zyngier
2023-05-17  8:59     ` Eric Auger
2023-05-17  8:59       ` Eric Auger
2023-05-17 14:12       ` Marc Zyngier
2023-05-17 14:12         ` Marc Zyngier
2023-06-06  9:33         ` Eric Auger
2023-06-06  9:33           ` Eric Auger
2023-06-06 16:30           ` Marc Zyngier
2023-06-06 16:30             ` Marc Zyngier
2023-06-07 16:39             ` Eric Auger
2023-06-07 16:39               ` Eric Auger
2023-06-06 17:52           ` Miguel Luis
2023-06-06 17:52             ` Miguel Luis
2023-06-07 16:40             ` Eric Auger
2023-06-07 16:40               ` Eric Auger
2023-06-10  8:25               ` Miguel Luis
2023-06-10  8:25                 ` Miguel Luis
2023-06-11 16:28                 ` Eric Auger
2023-06-05 11:28 ` Eric Auger
2023-06-05 11:28   ` Eric Auger
2023-06-06  7:30   ` Marc Zyngier
2023-06-06  7:30     ` Marc Zyngier
2023-06-06  9:29     ` Eric Auger
2023-06-06  9:29       ` Eric Auger
2023-06-06 16:22       ` Marc Zyngier
2023-06-06 16:22         ` Marc Zyngier
2023-06-07 16:30         ` Eric Auger
2023-06-07 16:30           ` Eric Auger
2023-06-28  6:45 ` Ganapatrao Kulkarni
2023-06-28  6:45   ` Ganapatrao Kulkarni
2023-06-29  7:03   ` Marc Zyngier
2023-06-29  7:03     ` Marc Zyngier
2023-07-04 12:31     ` Ganapatrao Kulkarni
2023-07-04 12:31       ` Ganapatrao Kulkarni
2023-07-07  9:46       ` Ganapatrao Kulkarni
2023-07-07  9:46         ` Ganapatrao Kulkarni
2023-07-11 11:56         ` Ganapatrao Kulkarni
2023-07-11 11:56           ` Ganapatrao Kulkarni
2023-07-11 12:30           ` Marc Zyngier
2023-07-11 12:30             ` Marc Zyngier
2023-07-10 12:56     ` Miguel Luis
2023-07-10 12:56       ` Miguel Luis
2023-07-18 10:29       ` Miguel Luis
2023-07-18 10:29         ` Miguel Luis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230515173103.1017669-28-maz@kernel.org \
    --to=maz@kernel.org \
    --cc=alexandru.elisei@arm.com \
    --cc=andre.przywara@arm.com \
    --cc=chase.conklin@arm.com \
    --cc=christoffer.dall@arm.com \
    --cc=darren@os.amperecomputing.com \
    --cc=gankulkarni@os.amperecomputing.com \
    --cc=james.morse@arm.com \
    --cc=jintack@cs.columbia.edu \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=miguel.luis@oracle.com \
    --cc=oliver.upton@linux.dev \
    --cc=rmk+kernel@armlinux.org.uk \
    --cc=suzuki.poulose@arm.com \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.