From: James Houghton <jthoughton@google.com>
To: Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>
Cc: David Matlack <dmatlack@google.com>,
David Rientjes <rientjes@google.com>,
James Houghton <jthoughton@google.com>,
Marc Zyngier <maz@kernel.org>,
Oliver Upton <oliver.upton@linux.dev>,
Wei Xu <weixugc@google.com>, Yu Zhao <yuzhao@google.com>,
Axel Rasmussen <axelrasmussen@google.com>,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v9 08/11] KVM: x86/mmu: Refactor low level rmap helpers to prep for walking w/o mmu_lock
Date: Tue, 4 Feb 2025 00:40:35 +0000 [thread overview]
Message-ID: <20250204004038.1680123-9-jthoughton@google.com> (raw)
In-Reply-To: <20250204004038.1680123-1-jthoughton@google.com>
From: Sean Christopherson <seanjc@google.com>
Refactor the pte_list and rmap code to always read and write rmap_head->val
exactly once, e.g. by collecting changes in a local variable and then
propagating those changes back to rmap_head->val as appropriate. This will
allow implementing a per-rmap rwlock (of sorts) by adding a LOCKED bit into
the rmap value alongside the MANY bit.
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
---
arch/x86/kvm/mmu/mmu.c | 83 +++++++++++++++++++++++++-----------------
1 file changed, 50 insertions(+), 33 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f75779d8d6fd..a24cf8ddca7f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -864,21 +864,24 @@ static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu
static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
struct kvm_rmap_head *rmap_head)
{
+ unsigned long old_val, new_val;
struct pte_list_desc *desc;
int count = 0;
- if (!rmap_head->val) {
- rmap_head->val = (unsigned long)spte;
- } else if (!(rmap_head->val & KVM_RMAP_MANY)) {
+ old_val = rmap_head->val;
+
+ if (!old_val) {
+ new_val = (unsigned long)spte;
+ } else if (!(old_val & KVM_RMAP_MANY)) {
desc = kvm_mmu_memory_cache_alloc(cache);
- desc->sptes[0] = (u64 *)rmap_head->val;
+ desc->sptes[0] = (u64 *)old_val;
desc->sptes[1] = spte;
desc->spte_count = 2;
desc->tail_count = 0;
- rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY;
+ new_val = (unsigned long)desc | KVM_RMAP_MANY;
++count;
} else {
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(old_val & ~KVM_RMAP_MANY);
count = desc->tail_count + desc->spte_count;
/*
@@ -887,21 +890,25 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
*/
if (desc->spte_count == PTE_LIST_EXT) {
desc = kvm_mmu_memory_cache_alloc(cache);
- desc->more = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc->more = (struct pte_list_desc *)(old_val & ~KVM_RMAP_MANY);
desc->spte_count = 0;
desc->tail_count = count;
- rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY;
+ new_val = (unsigned long)desc | KVM_RMAP_MANY;
+ } else {
+ new_val = old_val;
}
desc->sptes[desc->spte_count++] = spte;
}
+
+ rmap_head->val = new_val;
+
return count;
}
-static void pte_list_desc_remove_entry(struct kvm *kvm,
- struct kvm_rmap_head *rmap_head,
+static void pte_list_desc_remove_entry(struct kvm *kvm, unsigned long *rmap_val,
struct pte_list_desc *desc, int i)
{
- struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ struct pte_list_desc *head_desc = (struct pte_list_desc *)(*rmap_val & ~KVM_RMAP_MANY);
int j = head_desc->spte_count - 1;
/*
@@ -928,9 +935,9 @@ static void pte_list_desc_remove_entry(struct kvm *kvm,
* head at the next descriptor, i.e. the new head.
*/
if (!head_desc->more)
- rmap_head->val = 0;
+ *rmap_val = 0;
else
- rmap_head->val = (unsigned long)head_desc->more | KVM_RMAP_MANY;
+ *rmap_val = (unsigned long)head_desc->more | KVM_RMAP_MANY;
mmu_free_pte_list_desc(head_desc);
}
@@ -938,24 +945,26 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
+ unsigned long rmap_val;
int i;
- if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
- return;
+ rmap_val = rmap_head->val;
+ if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_val, kvm))
+ goto out;
- if (!(rmap_head->val & KVM_RMAP_MANY)) {
- if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
- return;
+ if (!(rmap_val & KVM_RMAP_MANY)) {
+ if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_val != spte, kvm))
+ goto out;
- rmap_head->val = 0;
+ rmap_val = 0;
} else {
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
while (desc) {
for (i = 0; i < desc->spte_count; ++i) {
if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(kvm, rmap_head,
+ pte_list_desc_remove_entry(kvm, &rmap_val,
desc, i);
- return;
+ goto out;
}
}
desc = desc->more;
@@ -963,6 +972,9 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
}
+
+out:
+ rmap_head->val = rmap_val;
}
static void kvm_zap_one_rmap_spte(struct kvm *kvm,
@@ -977,17 +989,19 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc, *next;
+ unsigned long rmap_val;
int i;
- if (!rmap_head->val)
+ rmap_val = rmap_head->val;
+ if (!rmap_val)
return false;
- if (!(rmap_head->val & KVM_RMAP_MANY)) {
- mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val);
+ if (!(rmap_val & KVM_RMAP_MANY)) {
+ mmu_spte_clear_track_bits(kvm, (u64 *)rmap_val);
goto out;
}
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
for (; desc; desc = next) {
for (i = 0; i < desc->spte_count; i++)
@@ -1003,14 +1017,15 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
{
+ unsigned long rmap_val = rmap_head->val;
struct pte_list_desc *desc;
- if (!rmap_head->val)
+ if (!rmap_val)
return 0;
- else if (!(rmap_head->val & KVM_RMAP_MANY))
+ else if (!(rmap_val & KVM_RMAP_MANY))
return 1;
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
return desc->tail_count + desc->spte_count;
}
@@ -1053,6 +1068,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
*/
struct rmap_iterator {
/* private fields */
+ struct rmap_head *head;
struct pte_list_desc *desc; /* holds the sptep if not NULL */
int pos; /* index of the sptep */
};
@@ -1067,18 +1083,19 @@ struct rmap_iterator {
static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
struct rmap_iterator *iter)
{
+ unsigned long rmap_val = rmap_head->val;
u64 *sptep;
- if (!rmap_head->val)
+ if (!rmap_val)
return NULL;
- if (!(rmap_head->val & KVM_RMAP_MANY)) {
+ if (!(rmap_val & KVM_RMAP_MANY)) {
iter->desc = NULL;
- sptep = (u64 *)rmap_head->val;
+ sptep = (u64 *)rmap_val;
goto out;
}
- iter->desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ iter->desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
iter->pos = 0;
sptep = iter->desc->sptes[iter->pos];
out:
--
2.48.1.362.g079036d154-goog
next prev parent reply other threads:[~2025-02-04 0:41 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-04 0:40 [PATCH v9 00/11] KVM: x86/mmu: Age sptes locklessly James Houghton
2025-02-04 0:40 ` [PATCH v9 01/11] KVM: Rename kvm_handle_hva_range() James Houghton
2025-02-04 0:40 ` [PATCH v9 02/11] KVM: Add lockless memslot walk to KVM James Houghton
2025-02-14 15:26 ` Sean Christopherson
2025-02-14 19:27 ` James Houghton
2025-02-04 0:40 ` [PATCH v9 03/11] KVM: x86/mmu: Factor out spte atomic bit clearing routine James Houghton
2025-02-04 0:40 ` [PATCH v9 04/11] KVM: x86/mmu: Relax locking for kvm_test_age_gfn() and kvm_age_gfn() James Houghton
2025-02-12 22:07 ` Sean Christopherson
2025-02-13 0:25 ` James Houghton
2025-02-04 0:40 ` [PATCH v9 05/11] KVM: x86/mmu: Rename spte_has_volatile_bits() to spte_needs_atomic_write() James Houghton
2025-02-12 22:09 ` Sean Christopherson
2025-02-13 0:26 ` James Houghton
2025-02-04 0:40 ` [PATCH v9 06/11] KVM: x86/mmu: Skip shadow MMU test_young if TDP MMU reports page as young James Houghton
2025-02-04 0:40 ` [PATCH v9 07/11] KVM: x86/mmu: Only check gfn age in shadow MMU if indirect_shadow_pages > 0 James Houghton
2025-02-04 0:40 ` James Houghton [this message]
2025-02-04 0:40 ` [PATCH v9 09/11] KVM: x86/mmu: Add infrastructure to allow walking rmaps outside of mmu_lock James Houghton
2025-02-04 0:40 ` [PATCH v9 10/11] KVM: x86/mmu: Add support for lockless walks of rmap SPTEs James Houghton
2025-02-04 0:40 ` [PATCH v9 11/11] KVM: x86/mmu: Support rmap walks without holding mmu_lock when aging gfns James Houghton
2025-02-15 0:50 ` [PATCH v9 00/11] KVM: x86/mmu: Age sptes locklessly Sean Christopherson
2025-02-18 19:29 ` Maxim Levitsky
2025-02-19 1:13 ` Sean Christopherson
2025-02-19 18:56 ` James Houghton
2025-02-25 22:00 ` Maxim Levitsky
2025-02-26 0:50 ` Sean Christopherson
2025-02-26 18:39 ` Maxim Levitsky
2025-02-27 0:51 ` Sean Christopherson
2025-02-27 1:54 ` Maxim Levitsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250204004038.1680123-9-jthoughton@google.com \
--to=jthoughton@google.com \
--cc=axelrasmussen@google.com \
--cc=dmatlack@google.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=maz@kernel.org \
--cc=oliver.upton@linux.dev \
--cc=pbonzini@redhat.com \
--cc=rientjes@google.com \
--cc=seanjc@google.com \
--cc=weixugc@google.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox