All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
To: seanjc@google.com, pbonzini@redhat.com, yan.y.zhao@intel.com,
	kai.huang@intel.com, kvm@vger.kernel.org, kas@kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	dave.hansen@intel.com, rick.p.edgecombe@intel.com
Subject: [PATCH 15/17] KVM: TDX: Handle removal of leaf SPTEs in .set_private_spte()
Date: Fri, 27 Mar 2026 13:14:19 -0700	[thread overview]
Message-ID: <20260327201421.2824383-16-rick.p.edgecombe@intel.com> (raw)
In-Reply-To: <20260327201421.2824383-1-rick.p.edgecombe@intel.com>

From: Sean Christopherson <seanjc@google.com>

Drop kvm_x86_ops.remove_external_spte(), and instead handling the removal
of leaf SPTEs in the S-EPT (a.k.a. external root) in .set_private_spte().
This will allow extending tdx_sept_set_private_spte() to support splitting
a huge S-EPT entry without needing yet another kvm_x86_ops hook.

Bug the VM if the callback fails, as redundant KVM_BUG_ON() calls are
benign (the WARN will fire if and only if the VM isn't already bugged) and
handle_changed_spte() is most definitely not prepared to handle failure.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 -
 arch/x86/include/asm/kvm_host.h    |  3 ---
 arch/x86/kvm/mmu/tdp_mmu.c         | 33 +-----------------------------
 arch/x86/kvm/vmx/tdx.c             | 22 ++++++++++++--------
 4 files changed, 14 insertions(+), 45 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index bced6d938702..ed348c6dd445 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -97,7 +97,6 @@ KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
 KVM_X86_OP(load_mmu_pgd)
 KVM_X86_OP_OPTIONAL_RET0(set_external_spte)
 KVM_X86_OP_OPTIONAL_RET0(free_external_spt)
-KVM_X86_OP_OPTIONAL(remove_external_spte)
 KVM_X86_OP(has_wbinvd_exit)
 KVM_X86_OP(get_l2_tsc_offset)
 KVM_X86_OP(get_l2_tsc_multiplier)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 808d2c7ea546..09588e797e4b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1884,9 +1884,6 @@ struct kvm_x86_ops {
 	int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
 				 void *external_spt);
 
-	/* Update external page table from spte getting removed, and flush TLB. */
-	void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
-				     u64 mirror_spte);
 
 	bool (*has_wbinvd_exit)(void);
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5dc9633c866e..806788bdecce 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -359,25 +359,6 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 }
 
-static void remove_external_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte,
-				 int level)
-{
-	/*
-	 * External (TDX) SPTEs are limited to PG_LEVEL_4K, and external
-	 * PTs are removed in a special order, involving free_external_spt().
-	 * But remove_external_spte() will be called on non-leaf PTEs via
-	 * __tdp_mmu_zap_root(), so avoid the error the former would return
-	 * in this case.
-	 */
-	if (!is_last_spte(old_spte, level))
-		return;
-
-	/* Zapping leaf spte is allowed only when write lock is held. */
-	lockdep_assert_held_write(&kvm->mmu_lock);
-
-	kvm_x86_call(remove_external_spte)(kvm, gfn, level, old_spte);
-}
-
 /**
  * handle_removed_pt() - handle a page table removed from the TDP structure
  *
@@ -472,9 +453,6 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
 							  FROZEN_SPTE, level);
 		}
 		handle_changed_spte(kvm, sp, gfn, old_spte, FROZEN_SPTE, level, shared);
-
-		if (is_mirror_sp(sp))
-			remove_external_spte(kvm, gfn, old_spte, level);
 	}
 
 	if (is_mirror_sp(sp) &&
@@ -590,7 +568,7 @@ static int __handle_changed_spte(struct kvm *kvm, struct kvm_mmu_page *sp,
 	if (was_present && !was_leaf &&
 	    (is_leaf || !is_present || WARN_ON_ONCE(pfn_changed))) {
 		handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared);
-	} else if (is_mirror_sp(sp) && is_present) {
+	} else if (is_mirror_sp(sp)) {
 		r = kvm_x86_call(set_external_spte)(kvm, gfn, old_spte,
 						    new_spte, level);
 		if (r)
@@ -737,15 +715,6 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
 
 	handle_changed_spte(kvm, sp, gfn, old_spte, new_spte, level, false);
 
-	/*
-	 * Users that do non-atomic setting of PTEs don't operate on mirror
-	 * roots, so don't handle it and bug the VM if it's seen.
-	 */
-	if (is_mirror_sptep(sptep)) {
-		KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
-		remove_external_spte(kvm, gfn, old_spte, level);
-	}
-
 	return old_spte;
 }
 
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 5a1a6610a98f..bfbadba8bc08 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1788,10 +1788,10 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
 	return tdx_reclaim_page(virt_to_page(private_spt));
 }
 
-static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
-					 enum pg_level level, u64 mirror_spte)
+static int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
+					enum pg_level level, u64 old_spte)
 {
-	struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
+	struct page *page = pfn_to_page(spte_to_pfn(old_spte));
 	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
 	gpa_t gpa = gfn_to_gpa(gfn);
 	u64 err, entry, level_state;
@@ -1804,16 +1804,16 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
 	 * there can't be anything populated in the private EPT.
 	 */
 	if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
-		return;
+		return -EIO;
 
 	/* TODO: handle large pages. */
 	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
-		return;
+		return -EIO;
 
 	err = tdh_do_no_vcpus(tdh_mem_range_block, kvm, &kvm_tdx->td, gpa,
 			      level, &entry, &level_state);
 	if (TDX_BUG_ON_2(err, TDH_MEM_RANGE_BLOCK, entry, level_state, kvm))
-		return;
+		return -EIO;
 
 	/*
 	 * TDX requires TLB tracking before dropping private page.  Do
@@ -1829,18 +1829,22 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
 	err = tdh_do_no_vcpus(tdh_mem_page_remove, kvm, &kvm_tdx->td, gpa,
 			      level, &entry, &level_state);
 	if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
-		return;
+		return -EIO;
 
 	err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
 	if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
-		return;
+		return -EIO;
 
 	tdx_quirk_reset_page(page);
+	return 0;
 }
 
 static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte,
 				     u64 new_spte, enum pg_level level)
 {
+	if (is_shadow_present_pte(old_spte))
+		return tdx_sept_remove_private_spte(kvm, gfn, level, old_spte);
+
 	if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm))
 		return -EIO;
 
@@ -3445,7 +3449,7 @@ int __init tdx_hardware_setup(void)
 
 	vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
 	vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
-	vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
+
 	vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
 	return 0;
 
-- 
2.53.0


  parent reply	other threads:[~2026-03-27 20:14 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-27 20:14 [PATCH 00/17] TDX MMU refactors Rick Edgecombe
2026-03-27 20:14 ` [PATCH 01/17] x86/tdx: Use pg_level in TDX APIs, not the TDX-Module's 0-based level Rick Edgecombe
2026-03-27 20:14 ` [PATCH 02/17] KVM: x86/mmu: Update iter->old_spte if cmpxchg64 on mirror SPTE "fails" Rick Edgecombe
2026-03-31  9:47   ` Huang, Kai
2026-03-31  9:17     ` Yan Zhao
2026-03-31  9:59       ` Huang, Kai
2026-03-31  9:22         ` Yan Zhao
2026-03-31 10:14           ` Huang, Kai
2026-03-27 20:14 ` [PATCH 03/17] KVM: TDX: Account all non-transient page allocations for per-TD structures Rick Edgecombe
2026-03-27 20:14 ` [PATCH 04/17] KVM: x86: Make "external SPTE" ops that can fail RET0 static calls Rick Edgecombe
2026-03-27 20:14 ` [PATCH 05/17] KVM: x86/tdp_mmu: Drop zapping KVM_BUG_ON() set_external_spte_present() Rick Edgecombe
2026-03-27 20:14 ` [PATCH 06/17] KVM: x86/tdp_mmu: Morph the !is_frozen_spte() check into a KVM_MMU_WARN_ON() Rick Edgecombe
2026-03-30  5:00   ` Yan Zhao
2026-03-31 16:37     ` Edgecombe, Rick P
2026-04-02  1:06       ` Yan Zhao
2026-04-02 19:21         ` Sean Christopherson
2026-04-03  2:47           ` Yan Zhao
2026-03-27 20:14 ` [PATCH 07/17] KVM: x86/tdp_mmu: Centralize updates to present external PTEs Rick Edgecombe
2026-03-30  6:14   ` Yan Zhao
2026-04-01 23:45     ` Edgecombe, Rick P
2026-04-02  1:59       ` Yan Zhao
2026-04-02 23:10         ` Edgecombe, Rick P
2026-04-02 23:28           ` Sean Christopherson
2026-04-03  9:05             ` Yan Zhao
2026-04-04  0:15               ` Edgecombe, Rick P
2026-04-07  8:34                 ` Yan Zhao
2026-04-07 17:21                   ` Edgecombe, Rick P
2026-04-08  1:23                     ` Yan Zhao
2026-04-03  9:08           ` Yan Zhao
2026-03-31 10:09   ` Huang, Kai
2026-04-01 23:58     ` Edgecombe, Rick P
2026-04-02 23:21       ` Sean Christopherson
2026-04-01  8:34   ` Yan Zhao
2026-04-02 23:46     ` Edgecombe, Rick P
2026-04-03 10:33       ` Yan Zhao
2026-04-08  1:50         ` Yan Zhao
2026-04-08 10:47   ` Binbin Wu
2026-03-27 20:14 ` [PATCH 08/17] KVM: TDX: Drop kvm_x86_ops.link_external_spt(), use .set_external_spte() for all Rick Edgecombe
2026-03-30  6:28   ` Yan Zhao
2026-03-27 20:14 ` [PATCH 09/17] KVM: TDX: Add helper to handle mapping leaf SPTE into S-EPT Rick Edgecombe
2026-03-30  6:43   ` Yan Zhao
2026-04-01 23:59     ` Edgecombe, Rick P
2026-03-27 20:14 ` [PATCH 10/17] KVM: TDX: Move set_external_spte_present() assert into TDX code Rick Edgecombe
2026-03-31 10:30   ` Huang, Kai
2026-04-02  0:00     ` Edgecombe, Rick P
2026-03-31 10:34   ` Huang, Kai
2026-03-27 20:14 ` [PATCH 11/17] KVM: x86/mmu: Fold set_external_spte_present() into its sole caller Rick Edgecombe
2026-03-31 10:36   ` Huang, Kai
2026-04-01  7:41   ` Yan Zhao
2026-03-27 20:14 ` [PATCH 12/17] KVM: x86/mmu: Plumb the old_spte into kvm_x86_ops.set_external_spte() Rick Edgecombe
2026-03-27 20:14 ` [PATCH 13/17] KVM: TDX: Hoist tdx_sept_remove_private_spte() above set_private_spte() Rick Edgecombe
2026-03-31 10:42   ` Huang, Kai
2026-04-02  0:04     ` Edgecombe, Rick P
2026-03-27 20:14 ` [PATCH 14/17] KVM: x86/mmu: Remove KVM_BUG_ON() that checks lock when removing PTs Rick Edgecombe
2026-03-30  7:01   ` Yan Zhao
2026-03-31 10:46     ` Huang, Kai
2026-04-02  0:08       ` Edgecombe, Rick P
2026-04-02  2:04         ` Yan Zhao
2026-03-27 20:14 ` Rick Edgecombe [this message]
2026-03-27 20:14 ` [PATCH 16/17] KVM: x86: Move error handling inside free_external_spt() Rick Edgecombe
2026-04-09  2:08   ` Binbin Wu
2026-03-27 20:14 ` [PATCH 17/17] KVM: TDX: Move external page table freeing to TDX code Rick Edgecombe
2026-03-30  7:49   ` Yan Zhao
2026-04-02  0:17     ` Edgecombe, Rick P
2026-04-02  2:16       ` Yan Zhao
2026-04-02  2:17         ` Yan Zhao
2026-03-31 11:02   ` Huang, Kai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260327201421.2824383-16-rick.p.edgecombe@intel.com \
    --to=rick.p.edgecombe@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=kai.huang@intel.com \
    --cc=kas@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=x86@kernel.org \
    --cc=yan.y.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.