public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
To: seanjc@google.com, pbonzini@redhat.com, yan.y.zhao@intel.com,
	kai.huang@intel.com, kvm@vger.kernel.org, kas@kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	dave.hansen@intel.com, rick.p.edgecombe@intel.com
Subject: [PATCH 08/17] KVM: TDX: Drop kvm_x86_ops.link_external_spt(), use .set_external_spte() for all
Date: Fri, 27 Mar 2026 13:14:12 -0700	[thread overview]
Message-ID: <20260327201421.2824383-9-rick.p.edgecombe@intel.com> (raw)
In-Reply-To: <20260327201421.2824383-1-rick.p.edgecombe@intel.com>

From: Sean Christopherson <seanjc@google.com>

Drop the dedicated .link_external_spt() for linking non-leaf S-EPT pages,
and instead funnel everything through .set_external_spte().  Using separate
hooks doesn't help prevent TDP MMU details from bleeding into TDX, and vice
versa; to the contrary, dedicated callbacks will result in _more_ pollution
when hugepage support is added, e.g. will require the TDP MMU to know
details about the splitting rules for TDX that aren't all that relevant to
the TDP MMU.

Ideally, KVM would provide a single pair of hooks to set S-EPT entries,
one hook for setting SPTEs under write-lock and another for settings SPTEs
under read-lock (e.g. to ensure the entire operation is "atomic", to allow
for failure, etc.).  Sadly, TDX's requirement that all child S-EPT entries
are removed before the parent makes that impractical: the TDP MMU
deliberately prunes non-leaf SPTEs and _then_ processes its children, thus
making it quite important for the TDP MMU to differentiate between zapping
leaf and non-leaf S-EPT entries.

However, that's the _only_ case that's truly special, and even that case
could be shoehorned into a single hook; it's just wouldn't be a net
positive.

Signed-off-by: Sean Christopherson <seanjc@google.com>
[add in trivial feedback]
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
v6:
 - rename external_spt->sept_pt (Rick, Yan)
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 -
 arch/x86/include/asm/kvm_host.h    |  3 --
 arch/x86/kvm/mmu/tdp_mmu.c         | 31 +--------------
 arch/x86/kvm/vmx/tdx.c             | 61 ++++++++++++++++++++----------
 4 files changed, 43 insertions(+), 53 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 31d5c5d58ae6..bced6d938702 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -95,7 +95,6 @@ KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
 KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
 KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
 KVM_X86_OP(load_mmu_pgd)
-KVM_X86_OP_OPTIONAL_RET0(link_external_spt)
 KVM_X86_OP_OPTIONAL_RET0(set_external_spte)
 KVM_X86_OP_OPTIONAL_RET0(free_external_spt)
 KVM_X86_OP_OPTIONAL(remove_external_spte)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d3bdc9828133..1139bd89f0cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1876,9 +1876,6 @@ struct kvm_x86_ops {
 	void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 			     int root_level);
 
-	/* Update external mapping with page table link. */
-	int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
-				void *external_spt);
 	/* Update the external page table from spte getting set. */
 	int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
 				 u64 mirror_spte);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index db16e81b9701..6dc08fe22841 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -494,42 +494,13 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
 	call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
 }
 
-static void *get_external_spt(gfn_t gfn, u64 new_spte, int level)
-{
-	if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) {
-		struct kvm_mmu_page *sp = spte_to_child_sp(new_spte);
-
-		WARN_ON_ONCE(sp->role.level + 1 != level);
-		WARN_ON_ONCE(sp->gfn != gfn);
-		return sp->external_spt;
-	}
-
-	return NULL;
-}
-
 static int __must_check set_external_spte_present(struct kvm *kvm,
 						  gfn_t gfn, u64 old_spte,
 						  u64 new_spte, int level)
 {
-	bool is_present = is_shadow_present_pte(new_spte);
-	bool is_leaf = is_present && is_last_spte(new_spte, level);
-	int ret = 0;
-
 	lockdep_assert_held(&kvm->mmu_lock);
 
-	/*
-	 * Use different call to either set up middle level
-	 * external page table, or leaf.
-	 */
-	if (is_leaf) {
-		ret = kvm_x86_call(set_external_spte)(kvm, gfn, level, new_spte);
-	} else {
-		void *external_spt = get_external_spt(gfn, new_spte, level);
-
-		KVM_BUG_ON(!external_spt, kvm);
-		ret = kvm_x86_call(link_external_spt)(kvm, gfn, level, external_spt);
-	}
-	return ret;
+	return kvm_x86_call(set_external_spte)(kvm, gfn, level, new_spte);
 }
 
 /**
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 01e070ec10fd..92a846b91bac 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1654,18 +1654,58 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
 	return 0;
 }
 
+static struct page *tdx_spte_to_sept_pt(struct kvm *kvm, gfn_t gfn,
+					     u64 new_spte, enum pg_level level)
+{
+	struct kvm_mmu_page *sp = spte_to_child_sp(new_spte);
+
+	if (KVM_BUG_ON(!sp->external_spt, kvm) ||
+	    KVM_BUG_ON(sp->role.level + 1 != level, kvm) ||
+	    KVM_BUG_ON(sp->gfn != gfn, kvm))
+		return NULL;
+
+	return virt_to_page(sp->external_spt);
+}
+
+static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
+				     enum pg_level level, u64 mirror_spte)
+{
+	gpa_t gpa = gfn_to_gpa(gfn);
+	u64 err, entry, level_state;
+	struct page *sept_pt;
+
+	sept_pt = tdx_spte_to_sept_pt(kvm, gfn, mirror_spte, level);
+	if (!sept_pt)
+		return -EIO;
+
+	err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, level, sept_pt,
+			       &entry, &level_state);
+	if (unlikely(tdx_operand_busy(err)))
+		return -EBUSY;
+
+	if (TDX_BUG_ON_2(err, TDH_MEM_SEPT_ADD, entry, level_state, kvm))
+		return -EIO;
+
+	return 0;
+}
+
 static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
 				     enum pg_level level, u64 mirror_spte)
 {
 	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
 	kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
 
+	if (KVM_BUG_ON(!is_shadow_present_pte(mirror_spte), kvm))
+		return -EIO;
+
+	if (!is_last_spte(mirror_spte, level))
+		return tdx_sept_link_private_spt(kvm, gfn, level, mirror_spte);
+
 	/* TODO: handle large pages. */
 	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
 		return -EIO;
 
-	WARN_ON_ONCE(!is_shadow_present_pte(mirror_spte) ||
-		     (mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
+	WARN_ON_ONCE((mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
 
 	/*
 	 * Ensure pre_fault_allowed is read by kvm_arch_vcpu_pre_fault_memory()
@@ -1685,23 +1725,7 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
 	return tdx_mem_page_aug(kvm, gfn, level, pfn);
 }
 
-static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
-				     enum pg_level level, void *private_spt)
-{
-	gpa_t gpa = gfn_to_gpa(gfn);
-	struct page *page = virt_to_page(private_spt);
-	u64 err, entry, level_state;
 
-	err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, level, page, &entry,
-			       &level_state);
-	if (unlikely(tdx_operand_busy(err)))
-		return -EBUSY;
-
-	if (TDX_BUG_ON_2(err, TDH_MEM_SEPT_ADD, entry, level_state, kvm))
-		return -EIO;
-
-	return 0;
-}
 
 /*
  * Ensure shared and private EPTs to be flushed on all vCPUs.
@@ -3413,7 +3437,6 @@ int __init tdx_hardware_setup(void)
 
 	vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size, sizeof(struct kvm_tdx));
 
-	vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
 	vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
 	vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
 	vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
-- 
2.53.0


  parent reply	other threads:[~2026-03-27 20:14 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-27 20:14 [PATCH 00/17] TDX MMU refactors Rick Edgecombe
2026-03-27 20:14 ` [PATCH 01/17] x86/tdx: Use pg_level in TDX APIs, not the TDX-Module's 0-based level Rick Edgecombe
2026-03-27 20:14 ` [PATCH 02/17] KVM: x86/mmu: Update iter->old_spte if cmpxchg64 on mirror SPTE "fails" Rick Edgecombe
2026-03-31  9:47   ` Huang, Kai
2026-03-31  9:17     ` Yan Zhao
2026-03-31  9:59       ` Huang, Kai
2026-03-31  9:22         ` Yan Zhao
2026-03-31 10:14           ` Huang, Kai
2026-03-27 20:14 ` [PATCH 03/17] KVM: TDX: Account all non-transient page allocations for per-TD structures Rick Edgecombe
2026-03-27 20:14 ` [PATCH 04/17] KVM: x86: Make "external SPTE" ops that can fail RET0 static calls Rick Edgecombe
2026-03-27 20:14 ` [PATCH 05/17] KVM: x86/tdp_mmu: Drop zapping KVM_BUG_ON() set_external_spte_present() Rick Edgecombe
2026-03-27 20:14 ` [PATCH 06/17] KVM: x86/tdp_mmu: Morph the !is_frozen_spte() check into a KVM_MMU_WARN_ON() Rick Edgecombe
2026-03-30  5:00   ` Yan Zhao
2026-03-31 16:37     ` Edgecombe, Rick P
2026-04-02  1:06       ` Yan Zhao
2026-04-02 19:21         ` Sean Christopherson
2026-04-03  2:47           ` Yan Zhao
2026-03-27 20:14 ` [PATCH 07/17] KVM: x86/tdp_mmu: Centralize updates to present external PTEs Rick Edgecombe
2026-03-30  6:14   ` Yan Zhao
2026-04-01 23:45     ` Edgecombe, Rick P
2026-04-02  1:59       ` Yan Zhao
2026-04-02 23:10         ` Edgecombe, Rick P
2026-04-02 23:28           ` Sean Christopherson
2026-04-03  9:05             ` Yan Zhao
2026-04-04  0:15               ` Edgecombe, Rick P
2026-04-07  8:34                 ` Yan Zhao
2026-04-07 17:21                   ` Edgecombe, Rick P
2026-04-08  1:23                     ` Yan Zhao
2026-04-03  9:08           ` Yan Zhao
2026-03-31 10:09   ` Huang, Kai
2026-04-01 23:58     ` Edgecombe, Rick P
2026-04-02 23:21       ` Sean Christopherson
2026-04-01  8:34   ` Yan Zhao
2026-04-02 23:46     ` Edgecombe, Rick P
2026-04-03 10:33       ` Yan Zhao
2026-04-08  1:50         ` Yan Zhao
2026-04-08 10:47   ` Binbin Wu
2026-03-27 20:14 ` Rick Edgecombe [this message]
2026-03-30  6:28   ` [PATCH 08/17] KVM: TDX: Drop kvm_x86_ops.link_external_spt(), use .set_external_spte() for all Yan Zhao
2026-03-27 20:14 ` [PATCH 09/17] KVM: TDX: Add helper to handle mapping leaf SPTE into S-EPT Rick Edgecombe
2026-03-30  6:43   ` Yan Zhao
2026-04-01 23:59     ` Edgecombe, Rick P
2026-03-27 20:14 ` [PATCH 10/17] KVM: TDX: Move set_external_spte_present() assert into TDX code Rick Edgecombe
2026-03-31 10:30   ` Huang, Kai
2026-04-02  0:00     ` Edgecombe, Rick P
2026-03-31 10:34   ` Huang, Kai
2026-03-27 20:14 ` [PATCH 11/17] KVM: x86/mmu: Fold set_external_spte_present() into its sole caller Rick Edgecombe
2026-03-31 10:36   ` Huang, Kai
2026-04-01  7:41   ` Yan Zhao
2026-03-27 20:14 ` [PATCH 12/17] KVM: x86/mmu: Plumb the old_spte into kvm_x86_ops.set_external_spte() Rick Edgecombe
2026-03-27 20:14 ` [PATCH 13/17] KVM: TDX: Hoist tdx_sept_remove_private_spte() above set_private_spte() Rick Edgecombe
2026-03-31 10:42   ` Huang, Kai
2026-04-02  0:04     ` Edgecombe, Rick P
2026-03-27 20:14 ` [PATCH 14/17] KVM: x86/mmu: Remove KVM_BUG_ON() that checks lock when removing PTs Rick Edgecombe
2026-03-30  7:01   ` Yan Zhao
2026-03-31 10:46     ` Huang, Kai
2026-04-02  0:08       ` Edgecombe, Rick P
2026-04-02  2:04         ` Yan Zhao
2026-03-27 20:14 ` [PATCH 15/17] KVM: TDX: Handle removal of leaf SPTEs in .set_private_spte() Rick Edgecombe
2026-03-27 20:14 ` [PATCH 16/17] KVM: x86: Move error handling inside free_external_spt() Rick Edgecombe
2026-04-09  2:08   ` Binbin Wu
2026-03-27 20:14 ` [PATCH 17/17] KVM: TDX: Move external page table freeing to TDX code Rick Edgecombe
2026-03-30  7:49   ` Yan Zhao
2026-04-02  0:17     ` Edgecombe, Rick P
2026-04-02  2:16       ` Yan Zhao
2026-04-02  2:17         ` Yan Zhao
2026-03-31 11:02   ` Huang, Kai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260327201421.2824383-9-rick.p.edgecombe@intel.com \
    --to=rick.p.edgecombe@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=kai.huang@intel.com \
    --cc=kas@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=x86@kernel.org \
    --cc=yan.y.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox