From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: jon@nutanix.com, mtosatti@redhat.com
Subject: [PATCH 05/22] KVM: x86/mmu: introduce struct kvm_pagewalk
Date: Mon, 11 May 2026 11:06:31 -0400 [thread overview]
Message-ID: <20260511150648.685374-6-pbonzini@redhat.com> (raw)
In-Reply-To: <20260511150648.685374-1-pbonzini@redhat.com>
In preparation for separating walking and building of page tables,
introduce a dummy struct kvm_pagewalk and pass it around instead of
its containing kvm_mmu to functions that do not build the page tables.
Outermost functions retrieve the mmu via container_of, while internal
functions can pass around the struct kvm_pagewalk pointer. x86.c is
still (mostly) oblivious to the existence of struct kvm_pagewalk. There
are only a couple exceptions for now, which were done already here
for simplicity, but the plan is for the KVM code to use struct
kvm_pagewalk whenever dealing with guest page tables.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 7 +++++-
arch/x86/kvm/hyperv.c | 2 +-
arch/x86/kvm/mmu.h | 19 +++++++++------
arch/x86/kvm/mmu/mmu.c | 2 +-
arch/x86/kvm/mmu/paging_tmpl.h | 43 +++++++++++++++++++--------------
arch/x86/kvm/x86.c | 4 +--
6 files changed, 46 insertions(+), 31 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2c8096ceb072..a7f89e832a52 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -473,10 +473,15 @@ struct kvm_page_fault;
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
- * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
+ * and 2-level 32-bit). The kvm_pagewalk structure abstracts the details of the
* current mmu mode.
*/
+struct kvm_pagewalk {
+};
+
struct kvm_mmu {
+ struct kvm_pagewalk w;
+
unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a374fd64a76a..a6e7d6f85409 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2041,7 +2041,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
* read with kvm_read_guest().
*/
if (!hc->fast) {
- hc->ingpa = kvm_translate_gpa(vcpu, vcpu->arch.walk_mmu, hc->ingpa,
+ hc->ingpa = kvm_translate_gpa(vcpu, &vcpu->arch.walk_mmu->w, hc->ingpa,
PFERR_GUEST_FINAL_MASK, NULL, 0);
if (unlikely(hc->ingpa == INVALID_GPA))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index ddf4e467c071..3f8ac193a1e6 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -169,21 +169,22 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
}
static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
- struct kvm_mmu *mmu)
+ struct kvm_pagewalk *w)
{
/*
* When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
- * @mmu's snapshot of CR0.WP and thus all related paging metadata may
+ * @w's snapshot of CR0.WP and thus all related paging metadata may
* be stale. Refresh CR0.WP and the metadata on-demand when checking
* for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing
* nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does
* need to refresh nested_mmu, a.k.a. the walker used to translate L2
* GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
*/
- if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
+ if (!tdp_enabled || w == &vcpu->arch.guest_mmu.w)
return;
- __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
+ __kvm_mmu_refresh_passthrough_bits(vcpu,
+ container_of(w, struct kvm_mmu, w));
}
/*
@@ -194,10 +195,12 @@ static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
* Return zero if the access does not fault; return the page fault error code
* if the access faults.
*/
-static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_pagewalk *w,
unsigned pte_access, unsigned pte_pkey,
u64 access)
{
+ struct kvm_mmu *mmu = container_of(w, struct kvm_mmu, w);
+
/* strip nested paging fault error codes */
unsigned int pfec = access;
unsigned long rflags = kvm_x86_call(get_rflags)(vcpu);
@@ -220,7 +223,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u32 errcode = PFERR_PRESENT_MASK;
bool fault;
- kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
+ kvm_mmu_refresh_passthrough_bits(vcpu, w);
fault = (mmu->permissions[index] >> pte_access) & 1;
@@ -301,12 +304,12 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
}
static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
- struct kvm_mmu *mmu,
+ struct kvm_pagewalk *w,
gpa_t gpa, u64 access,
struct x86_exception *exception,
u64 pte_access)
{
- if (mmu != &vcpu->arch.nested_mmu)
+ if (w != &vcpu->arch.nested_mmu.w)
return gpa;
return kvm_x86_ops.nested_ops->translate_nested_gpa(vcpu, gpa, access,
exception,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f8aa7eda661e..42b7397a1845 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4354,7 +4354,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* user-mode address if CR0.PG=0. Therefore *include* ACC_USER_MASK in
* the last argument to kvm_translate_gpa (which NPT does not use).
*/
- return kvm_translate_gpa(vcpu, mmu, vaddr, access | PFERR_GUEST_FINAL_MASK,
+ return kvm_translate_gpa(vcpu, &mmu->w, vaddr, access | PFERR_GUEST_FINAL_MASK,
exception, ACC_ALL);
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 07100bbfc270..ab1aebf2f73c 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -106,9 +106,10 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
-static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
+static inline void FNAME(protect_clean_gpte)(struct kvm_pagewalk *w, unsigned *access,
unsigned gpte)
{
+ struct kvm_mmu __maybe_unused *mmu = container_of(w, struct kvm_mmu, w);
unsigned mask;
/* dirty bit is not supported, so no need to track it */
@@ -147,8 +148,10 @@ static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte
#endif
}
-static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
+static bool FNAME(is_rsvd_bits_set)(struct kvm_pagewalk *w, u64 gpte, int level)
{
+ struct kvm_mmu *mmu = container_of(w, struct kvm_mmu, w);
+
return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) ||
FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte);
}
@@ -165,7 +168,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
!(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
- if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K))
+ if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu->w, gpte, PG_LEVEL_4K))
goto no_present;
return false;
@@ -206,10 +209,11 @@ static inline unsigned FNAME(gpte_access)(u64 gpte)
}
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
- struct kvm_mmu *mmu,
+ struct kvm_pagewalk *w,
struct guest_walker *walker,
gpa_t addr, int write_fault)
{
+ struct kvm_mmu __maybe_unused *mmu = container_of(w, struct kvm_mmu, w);
unsigned level, index;
pt_element_t pte, orig_pte;
pt_element_t __user *ptep_user;
@@ -278,9 +282,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
return pkeys;
}
-static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu,
+static inline bool FNAME(is_last_gpte)(struct kvm_pagewalk *w,
unsigned int level, unsigned int gpte)
{
+ struct kvm_mmu __maybe_unused *mmu = container_of(w, struct kvm_mmu, w);
+
/*
* For EPT and PAE paging (both variants), bit 7 is either reserved at
* all level or indicates a huge page (ignoring CR3/EPTP). In either
@@ -311,9 +317,10 @@ static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu,
* Fetch a guest pte for a guest virtual address, or for an L2's GPA.
*/
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ struct kvm_vcpu *vcpu, struct kvm_pagewalk *w,
gpa_t addr, u64 access)
{
+ struct kvm_mmu *mmu = container_of(w, struct kvm_mmu, w);
int ret;
pt_element_t pte;
pt_element_t __user *ptep_user;
@@ -387,7 +394,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn),
+ real_gpa = kvm_translate_gpa(vcpu, w, gfn_to_gpa(table_gfn),
nested_access | PFERR_GUEST_PAGE_MASK,
&walker->fault, 0);
@@ -429,7 +436,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
if (unlikely(!FNAME(is_present_gpte)(mmu, pte)))
goto error;
- if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
+ if (unlikely(FNAME(is_rsvd_bits_set)(w, pte, walker->level))) {
errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
goto error;
}
@@ -438,14 +445,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
/* Convert to ACC_*_MASK flags for struct guest_walker. */
walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
- } while (!FNAME(is_last_gpte)(mmu, walker->level, pte));
+ } while (!FNAME(is_last_gpte)(w, walker->level, pte));
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
- errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
+ errcode = permission_fault(vcpu, w, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
goto error;
@@ -457,7 +464,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gfn += pse36_gfn_delta(pte);
#endif
- real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn),
+ real_gpa = kvm_translate_gpa(vcpu, w, gfn_to_gpa(gfn),
access | PFERR_GUEST_FINAL_MASK,
&walker->fault, walker->pte_access);
if (real_gpa == INVALID_GPA)
@@ -466,7 +473,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->gfn = real_gpa >> PAGE_SHIFT;
if (!write_fault)
- FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
+ FNAME(protect_clean_gpte)(w, &walker->pte_access, pte);
else
/*
* On a write fault, fold the dirty bit into accessed_dirty.
@@ -477,7 +484,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
if (unlikely(!accessed_dirty)) {
- ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker,
+ ret = FNAME(update_accessed_dirty_bits)(vcpu, w, walker,
addr, write_fault);
if (unlikely(ret < 0))
goto error;
@@ -539,7 +546,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
}
#endif
walker->fault.address = addr;
- walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
+ walker->fault.nested_page_fault = w != &vcpu->arch.walk_mmu->w;
walker->fault.async_page_fault = false;
trace_kvm_mmu_walker_error(walker->fault.error_code);
@@ -549,7 +556,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gpa_t addr, u64 access)
{
- return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
+ return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu->w, addr,
access);
}
@@ -565,7 +572,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
- FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu->w, &pte_access, gpte);
return kvm_mmu_prefetch_sptes(vcpu, gfn, spte, 1, pte_access);
}
@@ -895,7 +902,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
WARN_ON_ONCE((addr >> 32) && mmu == vcpu->arch.walk_mmu);
#endif
- r = FNAME(walk_addr_generic)(&walker, vcpu, mmu, addr, access);
+ r = FNAME(walk_addr_generic)(&walker, vcpu, &mmu->w, addr, access);
if (r) {
gpa = gfn_to_gpa(walker.gfn);
@@ -945,7 +952,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(gpte);
- FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu->w, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access))
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efe54a9c887a..fca4c4adaa43 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1071,7 +1071,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
* If the MMU is nested, CR3 holds an L2 GPA and needs to be translated
* to an L1 GPA.
*/
- real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
+ real_gpa = kvm_translate_gpa(vcpu, &mmu->w, gfn_to_gpa(pdpt_gfn),
PFERR_USER_MASK | PFERR_WRITE_MASK |
PFERR_GUEST_PAGE_MASK, NULL, 0);
if (real_gpa == INVALID_GPA)
@@ -8090,7 +8090,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
* shadow page table for L2 guest.
*/
if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
- !permission_fault(vcpu, vcpu->arch.walk_mmu,
+ !permission_fault(vcpu, &vcpu->arch.walk_mmu->w,
vcpu->arch.mmio_access, 0, access))) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
--
2.52.0
next prev parent reply other threads:[~2026-05-11 15:07 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-11 15:06 [RFC PATCH 00/22] KVM: apply chainsaw to struct kvm_mmu Paolo Bonzini
2026-05-11 15:06 ` [PATCH 01/22] KVM: x86: remove nested_mmu from mmu_is_nested() Paolo Bonzini
2026-05-11 15:06 ` [PATCH 02/22] KVM: x86: move pdptrs out of the MMU Paolo Bonzini
2026-05-11 15:06 ` [PATCH 03/22] KVM: x86: check that kvm_handle_invpcid is only invoked with shadow paging Paolo Bonzini
2026-05-11 15:06 ` [PATCH 04/22] KVM: x86/hyperv: remove unnecessary mmu_is_nested() check Paolo Bonzini
2026-05-11 15:06 ` Paolo Bonzini [this message]
2026-05-11 15:06 ` [PATCH 06/22] KVM: x86/mmu: move get_guest_pgd to struct kvm_pagewalk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 07/22] KVM: x86/mmu: move gva_to_gpa " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 08/22] KVM: x86/mmu: move get_pdptr " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 09/22] KVM: x86/mmu: move inject_page_fault " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 10/22] KVM: x86/mmu: move CPU-related fields " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 11/22] KVM: x86/mmu: change CPU-role accessor fields to take " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 12/22] KVM: x86/mmu: move remaining permission fields to " Paolo Bonzini
2026-05-11 15:06 ` [PATCH 13/22] KVM: x86/mmu: pass struct kvm_pagewalk to kvm_mmu_invalidate_addr Paolo Bonzini
2026-05-11 15:06 ` [PATCH 14/22] KVM: x86/mmu: change walk_mmu to struct kvm_pagewalk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 15/22] KVM: x86/mmu: change nested_mmu.w to nested_cpu_walk Paolo Bonzini
2026-05-11 15:06 ` [PATCH 16/22] KVM: x86/mmu: make cpu_walk a value Paolo Bonzini
2026-05-11 15:06 ` [PATCH 17/22] KVM: x86/mmu: pull struct kvm_pagewalk out of struct kvm_mmu Paolo Bonzini
2026-05-11 15:06 ` [PATCH 18/22] KVM: x86/mmu: cleanup functions that initialize shadow MMU Paolo Bonzini
2026-05-11 15:06 ` [PATCH 19/22] KVM: x86/mmu: pull page format to a new struct Paolo Bonzini
2026-05-11 15:06 ` [PATCH 20/22] KVM: x86/mmu: merge struct rsvd_bits_validate into struct kvm_page_format Paolo Bonzini
2026-05-11 15:06 ` [PATCH 21/22] KVM: x86/mmu: parameterize update_permission_bitmask() Paolo Bonzini
2026-05-11 15:06 ` [PATCH 22/22] KVM: x86/mmu: use kvm_page_format to test SPTEs Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260511150648.685374-6-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=jon@nutanix.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox