linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Yu Zhao <yuzhao@google.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>,
	Michael Larabel <michael@michaellarabel.com>,
	kvmarm@lists.linux.dev,  kvm@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	 linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	 linuxppc-dev@lists.ozlabs.org, x86@kernel.org,
	linux-mm@google.com,  Yu Zhao <yuzhao@google.com>
Subject: [PATCH mm-unstable v1 4/5] kvm/powerpc: add kvm_arch_test_clear_young()
Date: Thu, 16 Feb 2023 21:12:29 -0700	[thread overview]
Message-ID: <20230217041230.2417228-5-yuzhao@google.com> (raw)
In-Reply-To: <20230217041230.2417228-1-yuzhao@google.com>

This patch adds kvm_arch_test_clear_young() for the vast majority of
VMs that are not nested and run on hardware with Radix MMU enabled.

It relies on two techniques, RCU and cmpxchg, to safely test and clear
the accessed bit without taking the MMU lock. The former protects KVM
page tables from being freed while the latter clears the accessed bit
atomically against both the hardware and other software page table
walkers.

Signed-off-by: Yu Zhao <yuzhao@google.com>
---
 arch/powerpc/include/asm/kvm_host.h    | 18 ++++++
 arch/powerpc/include/asm/kvm_ppc.h     | 14 +----
 arch/powerpc/kvm/book3s.c              |  7 +++
 arch/powerpc/kvm/book3s.h              |  2 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 78 +++++++++++++++++++++++++-
 arch/powerpc/kvm/book3s_hv.c           | 10 ++--
 6 files changed, 110 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index caea15dcb91d..996850029ce0 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -886,4 +886,22 @@ static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
+static inline int kvmppc_radix_possible(void)
+{
+	return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
+}
+
+static inline bool kvmhv_on_pseries(void)
+{
+	return IS_ENABLED(CONFIG_PPC_PSERIES) && !cpu_has_feature(CPU_FTR_HVMODE);
+}
+
+/* see the comments on the generic kvm_arch_has_test_clear_young() */
+#define kvm_arch_has_test_clear_young kvm_arch_has_test_clear_young
+static inline bool kvm_arch_has_test_clear_young(void)
+{
+	return IS_ENABLED(CONFIG_KVM) && IS_ENABLED(CONFIG_KVM_BOOK3S_HV_POSSIBLE) &&
+	       kvmppc_radix_possible() && !kvmhv_on_pseries();
+}
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index eae9619b6190..0bb772fc12b1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -277,6 +277,8 @@ struct kvmppc_ops {
 	bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
 	bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
 	bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
+	bool (*test_clear_young)(struct kvm *kvm, struct kvm_gfn_range *range,
+				 gfn_t lsb_gfn, unsigned long *bitmap);
 	bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
 	void (*free_memslot)(struct kvm_memory_slot *slot);
 	int (*init_vm)(struct kvm *kvm);
@@ -580,18 +582,6 @@ static inline bool kvm_hv_mode_active(void)		{ return false; }
 
 #endif
 
-#ifdef CONFIG_PPC_PSERIES
-static inline bool kvmhv_on_pseries(void)
-{
-	return !cpu_has_feature(CPU_FTR_HVMODE);
-}
-#else
-static inline bool kvmhv_on_pseries(void)
-{
-	return false;
-}
-#endif
-
 #ifdef CONFIG_KVM_XICS
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6d525285dbe8..f4cf330e3e81 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -877,6 +877,13 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
 }
 
+bool kvm_arch_test_clear_young(struct kvm *kvm, struct kvm_gfn_range *range,
+			       gfn_t lsb_gfn, unsigned long *bitmap)
+{
+	return kvm->arch.kvm_ops->test_clear_young &&
+	       kvm->arch.kvm_ops->test_clear_young(kvm, range, lsb_gfn, bitmap);
+}
+
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 58391b4b32ed..fe9cac423817 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -12,6 +12,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
 extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
 extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
 extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern bool kvmhv_test_clear_young(struct kvm *kvm, struct kvm_gfn_range *range,
+				   gfn_t lsb_gfn, unsigned long *bitmap);
 extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
 
 extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 9d3743ca16d5..8476646c554c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1083,6 +1083,78 @@ bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	return ref;
 }
 
+bool kvmhv_test_clear_young(struct kvm *kvm, struct kvm_gfn_range *range,
+			    gfn_t lsb_gfn, unsigned long *bitmap)
+{
+	bool success;
+	gfn_t gfn = range->start;
+
+	if (WARN_ON_ONCE(!kvm_arch_has_test_clear_young()))
+		return false;
+
+	/*
+	 * This function relies on two techniques, RCU and cmpxchg, to safely
+	 * test and clear the accessed bit without taking the MMU lock. The
+	 * former protects KVM page tables from being freed while the latter
+	 * clears the accessed bit atomically against both the hardware and
+	 * other software page table walkers.
+	 */
+	rcu_read_lock();
+
+	success = kvm_is_radix(kvm);
+	if (!success)
+		goto unlock;
+
+	/*
+	 * case 1:  this function          kvmppc_switch_mmu_to_hpt()
+	 *
+	 *          rcu_read_lock()
+	 *          test kvm_is_radix()    kvm->arch.radix = 0
+	 *          use kvm->arch.pgtable
+	 *          rcu_read_unlock()
+	 *                                 synchronize_rcu()
+	 *                                 kvmppc_free_radix()
+	 *
+	 *
+	 * case 2:  this function          kvmppc_switch_mmu_to_radix()
+	 *
+	 *                                 kvmppc_init_vm_radix()
+	 *                                 smp_wmb()
+	 *          test kvm_is_radix()    kvm->arch.radix = 1
+	 *          smp_rmb()
+	 *          use kvm->arch.pgtable
+	 */
+	smp_rmb();
+
+	while (gfn < range->end) {
+		pte_t *ptep;
+		pte_t old, new;
+		unsigned int shift;
+
+		ptep = find_kvm_secondary_pte_unlocked(kvm, gfn * PAGE_SIZE, &shift);
+		if (!ptep)
+			goto next;
+
+		VM_WARN_ON_ONCE(!page_count(virt_to_page(ptep)));
+
+		old = READ_ONCE(*ptep);
+		if (!pte_present(old) || !pte_young(old))
+			goto next;
+
+		new = pte_mkold(old);
+
+		/* see the comments on the generic kvm_arch_has_test_clear_young() */
+		if (__test_and_change_bit(lsb_gfn - gfn, bitmap))
+			pte_xchg(ptep, old, new);
+next:
+		gfn += shift ? BIT(shift - PAGE_SHIFT) : 1;
+	}
+unlock:
+	rcu_read_unlock();
+
+	return success;
+}
+
 /* Returns the number of PAGE_SIZE pages that are dirty */
 static int kvm_radix_test_clear_dirty(struct kvm *kvm,
 				struct kvm_memory_slot *memslot, int pagenum)
@@ -1464,13 +1536,15 @@ int kvmppc_radix_init(void)
 {
 	unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
 
-	kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
+	kvm_pte_cache = kmem_cache_create("kvm-pte", size, size,
+					  SLAB_TYPESAFE_BY_RCU, pte_ctor);
 	if (!kvm_pte_cache)
 		return -ENOMEM;
 
 	size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
 
-	kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
+	kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size,
+					  SLAB_TYPESAFE_BY_RCU, pmd_ctor);
 	if (!kvm_pmd_cache) {
 		kmem_cache_destroy(kvm_pte_cache);
 		return -ENOMEM;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6ba68dd6190b..17b415661282 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5242,6 +5242,8 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 	spin_lock(&kvm->mmu_lock);
 	kvm->arch.radix = 0;
 	spin_unlock(&kvm->mmu_lock);
+	/* see the comments in kvmhv_test_clear_young() */
+	synchronize_rcu();
 	kvmppc_free_radix(kvm);
 
 	lpcr = LPCR_VPM1;
@@ -5266,6 +5268,8 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
 	if (err)
 		return err;
 	kvmppc_rmap_reset(kvm);
+	/* see the comments in kvmhv_test_clear_young() */
+	smp_wmb();
 	/* Mutual exclusion with kvm_unmap_gfn_range etc. */
 	spin_lock(&kvm->mmu_lock);
 	kvm->arch.radix = 1;
@@ -6165,6 +6169,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.unmap_gfn_range = kvm_unmap_gfn_range_hv,
 	.age_gfn = kvm_age_gfn_hv,
 	.test_age_gfn = kvm_test_age_gfn_hv,
+	.test_clear_young = kvmhv_test_clear_young,
 	.set_spte_gfn = kvm_set_spte_gfn_hv,
 	.free_memslot = kvmppc_core_free_memslot_hv,
 	.init_vm =  kvmppc_core_init_vm_hv,
@@ -6225,11 +6230,6 @@ static int kvm_init_subcore_bitmap(void)
 	return 0;
 }
 
-static int kvmppc_radix_possible(void)
-{
-	return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
-}
-
 static int kvmppc_book3s_init_hv(void)
 {
 	int r;
-- 
2.39.2.637.g21b0678d19-goog



  parent reply	other threads:[~2023-02-17  4:12 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-17  4:12 [PATCH mm-unstable v1 0/5] mm/kvm: lockless accessed bit harvest Yu Zhao
2023-02-17  4:12 ` [PATCH mm-unstable v1 1/5] mm/kvm: add mmu_notifier_test_clear_young() Yu Zhao
2023-02-23 17:13   ` Sean Christopherson
2023-02-23 17:40     ` Yu Zhao
2023-02-23 21:12       ` Sean Christopherson
2023-02-23 17:34   ` Sean Christopherson
2023-02-17  4:12 ` [PATCH mm-unstable v1 2/5] kvm/x86: add kvm_arch_test_clear_young() Yu Zhao
2023-02-17  4:19   ` Yu Zhao
2023-02-17 16:27   ` Sean Christopherson
2023-02-23  5:58     ` Yu Zhao
2023-02-23 17:09       ` Sean Christopherson
2023-02-23 17:27         ` Yu Zhao
2023-02-23 18:23           ` Sean Christopherson
2023-02-23 18:34             ` Yu Zhao
2023-02-23 18:47               ` Sean Christopherson
2023-02-23 19:02                 ` Yu Zhao
2023-02-23 19:21                   ` Sean Christopherson
2023-02-23 19:25                     ` Yu Zhao
2023-02-17  4:12 ` [PATCH mm-unstable v1 3/5] kvm/arm64: " Yu Zhao
2023-02-17  4:21   ` Yu Zhao
2023-02-17  9:00     ` Marc Zyngier
2023-02-23  3:58       ` Yu Zhao
2023-02-23  9:03         ` Marc Zyngier
2023-02-23  9:18           ` Yu Zhao
2023-02-17  9:09   ` Oliver Upton
2023-02-17 16:00     ` Sean Christopherson
2023-02-23  5:25       ` Yu Zhao
2023-02-23  4:43     ` Yu Zhao
2023-02-17  4:12 ` Yu Zhao [this message]
2023-02-17  4:24   ` [PATCH mm-unstable v1 4/5] kvm/powerpc: " Yu Zhao
2023-02-17  4:12 ` [PATCH mm-unstable v1 5/5] mm: multi-gen LRU: use mmu_notifier_test_clear_young() Yu Zhao
2023-02-23 17:43   ` Sean Christopherson
2023-02-23 18:08     ` Yu Zhao
2023-02-23 19:11       ` Sean Christopherson
2023-02-23 19:36         ` Yu Zhao
2023-02-23 19:58           ` Sean Christopherson
2023-02-23 20:09             ` Yu Zhao
2023-02-23 20:28               ` Sean Christopherson
2023-02-23 20:48                 ` Yu Zhao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230217041230.2417228-5-yuzhao@google.com \
    --to=yuzhao@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@google.com \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=michael@michaellarabel.com \
    --cc=pbonzini@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).