linux-coco.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: pbonzini@redhat.com, seanjc@google.com, dave.hansen@linux.intel.com
Cc: rick.p.edgecombe@intel.com, isaku.yamahata@intel.com,
	kai.huang@intel.com, yan.y.zhao@intel.com, chao.gao@intel.com,
	tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	kvm@vger.kernel.org, x86@kernel.org, linux-coco@lists.linux.dev,
	linux-kernel@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv2 04/12] x86/virt/tdx: Add tdx_alloc/free_page() helpers
Date: Mon,  9 Jun 2025 22:13:32 +0300	[thread overview]
Message-ID: <20250609191340.2051741-5-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20250609191340.2051741-1-kirill.shutemov@linux.intel.com>

The new helpers allocate and free pages that can be used for a TDs.

Besides page allocation and freeing, these helpers also take care about
managing PAMT memory, if kernel runs on a platform with Dynamic PAMT
supported.

tdx_pamt_get()/put() helpers take care of PAMT allocation/freeing and
its refcounting.

PAMT memory is allocated when refcount for the 2M range crosses from 0
to 1 and gets freed back on when it is dropped to zero. These
transitions can happen concurrently and pamt_lock spinlock serializes
them.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/tdx.h       |   3 +
 arch/x86/include/asm/tdx_errno.h |   6 +
 arch/x86/virt/vmx/tdx/tdx.c      | 205 +++++++++++++++++++++++++++++++
 arch/x86/virt/vmx/tdx/tdx.h      |   2 +
 4 files changed, 216 insertions(+)

diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 583d6fe66821..d9a77147412f 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -115,6 +115,9 @@ int tdx_guest_keyid_alloc(void);
 u32 tdx_get_nr_guest_keyids(void);
 void tdx_guest_keyid_free(unsigned int keyid);
 
+struct page *tdx_alloc_page(void);
+void tdx_free_page(struct page *page);
+
 struct tdx_td {
 	/* TD root structure: */
 	struct page *tdr_page;
diff --git a/arch/x86/include/asm/tdx_errno.h b/arch/x86/include/asm/tdx_errno.h
index d418934176e2..0b3332c2d6b2 100644
--- a/arch/x86/include/asm/tdx_errno.h
+++ b/arch/x86/include/asm/tdx_errno.h
@@ -18,6 +18,7 @@
 #define TDX_PREVIOUS_TLB_EPOCH_BUSY		0x8000020100000000ULL
 #define TDX_RND_NO_ENTROPY			0x8000020300000000ULL
 #define TDX_PAGE_METADATA_INCORRECT		0xC000030000000000ULL
+#define TDX_HPA_RANGE_NOT_FREE			0xC000030400000000ULL
 #define TDX_VCPU_NOT_ASSOCIATED			0x8000070200000000ULL
 #define TDX_KEY_GENERATION_FAILED		0x8000080000000000ULL
 #define TDX_KEY_STATE_INCORRECT			0xC000081100000000ULL
@@ -86,5 +87,10 @@ static inline bool tdx_operand_busy(u64 err)
 {
 	return tdx_status(err) == TDX_OPERAND_BUSY;
 }
+
+static inline bool tdx_hpa_range_not_free(u64 err)
+{
+	return tdx_status(err) == TDX_HPA_RANGE_NOT_FREE;
+}
 #endif /* __ASSEMBLER__ */
 #endif /* _X86_TDX_ERRNO_H */
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index ad9d7a30989d..c514c60e8c8d 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -2000,3 +2000,208 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
 	return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
 }
 EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
+
+static int tdx_nr_pamt_pages(void)
+{
+	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+		return 0;
+
+	return tdx_sysinfo.tdmr.pamt_4k_entry_size * PTRS_PER_PTE / PAGE_SIZE;
+}
+
+static u64 tdh_phymem_pamt_add(unsigned long hpa,
+			       struct list_head *pamt_pages)
+{
+	struct tdx_module_args args = {
+		.rcx = hpa,
+	};
+	struct page *page;
+	u64 *p;
+
+	WARN_ON_ONCE(!IS_ALIGNED(hpa & PAGE_MASK, PMD_SIZE));
+
+	p = &args.rdx;
+	list_for_each_entry(page, pamt_pages, lru) {
+		*p = page_to_phys(page);
+		p++;
+	}
+
+	return seamcall(TDH_PHYMEM_PAMT_ADD, &args);
+}
+
+static u64 tdh_phymem_pamt_remove(unsigned long hpa,
+				  struct list_head *pamt_pages)
+{
+	struct tdx_module_args args = {
+		.rcx = hpa,
+	};
+	struct page *page;
+	u64 *p, ret;
+
+	WARN_ON_ONCE(!IS_ALIGNED(hpa & PAGE_MASK, PMD_SIZE));
+
+	ret = seamcall_ret(TDH_PHYMEM_PAMT_REMOVE, &args);
+	if (ret)
+		return ret;
+
+	p = &args.rdx;
+	for (int i = 0; i < tdx_nr_pamt_pages(); i++) {
+		page = phys_to_page(*p);
+		list_add(&page->lru, pamt_pages);
+		p++;
+	}
+
+	return ret;
+}
+
+static DEFINE_SPINLOCK(pamt_lock);
+
+static void tdx_free_pamt_pages(struct list_head *pamt_pages)
+{
+	struct page *page;
+
+	while ((page = list_first_entry_or_null(pamt_pages, struct page, lru))) {
+		list_del(&page->lru);
+		__free_page(page);
+	}
+}
+
+static int tdx_alloc_pamt_pages(struct list_head *pamt_pages)
+{
+	for (int i = 0; i < tdx_nr_pamt_pages(); i++) {
+		struct page *page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto fail;
+		list_add(&page->lru, pamt_pages);
+	}
+	return 0;
+fail:
+	tdx_free_pamt_pages(pamt_pages);
+	return -ENOMEM;
+}
+
+static int tdx_pamt_add(atomic_t *pamt_refcount, unsigned long hpa,
+			struct list_head *pamt_pages)
+{
+	u64 err;
+
+	guard(spinlock)(&pamt_lock);
+
+	hpa = ALIGN_DOWN(hpa, PMD_SIZE);
+
+	/* Lost race to other tdx_pamt_add() */
+	if (atomic_read(pamt_refcount) != 0) {
+		atomic_inc(pamt_refcount);
+		return 1;
+	}
+
+	err = tdh_phymem_pamt_add(hpa | TDX_PS_2M, pamt_pages);
+
+	/*
+	 * tdx_hpa_range_not_free() is true if current task won race
+	 * against tdx_pamt_put().
+	 */
+	if (err && !tdx_hpa_range_not_free(err)) {
+		pr_err("TDH_PHYMEM_PAMT_ADD failed: %#llx\n", err);
+		return -EIO;
+	}
+
+	atomic_set(pamt_refcount, 1);
+
+	if (tdx_hpa_range_not_free(err))
+		return 1;
+
+	return 0;
+}
+
+static int tdx_pamt_get(struct page *page, enum pg_level level)
+{
+	unsigned long hpa = page_to_phys(page);
+	atomic_t *pamt_refcount;
+	LIST_HEAD(pamt_pages);
+	int ret;
+
+	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+		return 0;
+
+	if (level != PG_LEVEL_4K)
+		return 0;
+
+	pamt_refcount = tdx_get_pamt_refcount(hpa);
+	WARN_ON_ONCE(atomic_read(pamt_refcount) < 0);
+
+	if (atomic_inc_not_zero(pamt_refcount))
+		return 0;
+
+	if (tdx_alloc_pamt_pages(&pamt_pages))
+		return -ENOMEM;
+
+	ret = tdx_pamt_add(pamt_refcount, hpa, &pamt_pages);
+	if (ret)
+		tdx_free_pamt_pages(&pamt_pages);
+
+	return ret >= 0 ? 0 : ret;
+}
+
+static void tdx_pamt_put(struct page *page, enum pg_level level)
+{
+	unsigned long hpa = page_to_phys(page);
+	atomic_t *pamt_refcount;
+	LIST_HEAD(pamt_pages);
+	u64 err;
+
+	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+		return;
+
+	if (level != PG_LEVEL_4K)
+		return;
+
+	hpa = ALIGN_DOWN(hpa, PMD_SIZE);
+
+	pamt_refcount = tdx_get_pamt_refcount(hpa);
+	if (!atomic_dec_and_test(pamt_refcount))
+		return;
+
+	scoped_guard(spinlock, &pamt_lock) {
+		/* Lost race against tdx_pamt_add()? */
+		if (atomic_read(pamt_refcount) != 0)
+			return;
+
+		err = tdh_phymem_pamt_remove(hpa | TDX_PS_2M, &pamt_pages);
+
+		if (err) {
+			atomic_inc(pamt_refcount);
+			pr_err("TDH_PHYMEM_PAMT_REMOVE failed: %#llx\n", err);
+			return;
+		}
+	}
+
+	tdx_free_pamt_pages(&pamt_pages);
+}
+
+struct page *tdx_alloc_page(void)
+{
+	struct page *page;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return NULL;
+
+	if (tdx_pamt_get(page, PG_LEVEL_4K)) {
+		__free_page(page);
+		return NULL;
+	}
+
+	return page;
+}
+EXPORT_SYMBOL_GPL(tdx_alloc_page);
+
+void tdx_free_page(struct page *page)
+{
+	if (!page)
+		return;
+
+	tdx_pamt_put(page, PG_LEVEL_4K);
+	__free_page(page);
+}
+EXPORT_SYMBOL_GPL(tdx_free_page);
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index 82bb82be8567..46c4214b79fb 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -46,6 +46,8 @@
 #define TDH_PHYMEM_PAGE_WBINVD		41
 #define TDH_VP_WR			43
 #define TDH_SYS_CONFIG			45
+#define TDH_PHYMEM_PAMT_ADD		58
+#define TDH_PHYMEM_PAMT_REMOVE		59
 
 /*
  * SEAMCALL leaf:
-- 
2.47.2


  parent reply	other threads:[~2025-06-09 19:13 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-09 19:13 [PATCHv2 00/12] TDX: Enable Dynamic PAMT Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 01/12] x86/tdx: Consolidate TDX error handling Kirill A. Shutemov
2025-06-25 17:58   ` Dave Hansen
2025-06-25 20:58     ` Edgecombe, Rick P
2025-06-25 21:27       ` Sean Christopherson
2025-06-25 21:46         ` Edgecombe, Rick P
2025-06-26  9:25         ` kirill.shutemov
2025-06-26 14:46           ` Dave Hansen
2025-06-26 15:51             ` Sean Christopherson
2025-06-26 16:59               ` Dave Hansen
2025-06-27 10:42                 ` kirill.shutemov
2025-07-30 18:32                 ` Edgecombe, Rick P
2025-07-31 23:31                   ` Sean Christopherson
2025-07-31 23:46                     ` Edgecombe, Rick P
2025-07-31 23:53                       ` Sean Christopherson
2025-08-01 15:03                         ` Edgecombe, Rick P
2025-08-06 15:19                           ` Sean Christopherson
2025-06-26  0:05     ` Huang, Kai
2025-07-30 18:33       ` Edgecombe, Rick P
2025-06-09 19:13 ` [PATCHv2 02/12] x86/virt/tdx: Allocate page bitmap for Dynamic PAMT Kirill A. Shutemov
2025-06-25 18:06   ` Dave Hansen
2025-06-26  9:25     ` Kirill A. Shutemov
2025-07-31  1:06     ` Edgecombe, Rick P
2025-07-31  4:10       ` Huang, Kai
2025-06-26 11:08   ` Huang, Kai
2025-06-27 10:42     ` kirill.shutemov
2025-06-09 19:13 ` [PATCHv2 03/12] x86/virt/tdx: Allocate reference counters for PAMT memory Kirill A. Shutemov
2025-06-25 19:26   ` Dave Hansen
2025-06-27 11:27     ` Kirill A. Shutemov
2025-06-27 14:03       ` Dave Hansen
2025-06-26  0:53   ` Huang, Kai
2025-06-26  4:48     ` Huang, Kai
2025-06-27 11:35     ` kirill.shutemov
2025-06-09 19:13 ` Kirill A. Shutemov [this message]
2025-06-10  2:36   ` [PATCHv2 04/12] x86/virt/tdx: Add tdx_alloc/free_page() helpers Chao Gao
2025-06-10 14:51     ` [PATCHv2.1 " Kirill A. Shutemov
2025-06-25 18:01       ` Dave Hansen
2025-06-25 20:09     ` [PATCHv2 " Dave Hansen
2025-06-26  0:46       ` Chao Gao
2025-06-25 20:02   ` Dave Hansen
2025-06-27 13:00     ` Kirill A. Shutemov
2025-06-27  7:49   ` Adrian Hunter
2025-06-27 13:03     ` Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 05/12] KVM: TDX: Allocate PAMT memory in __tdx_td_init() Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 06/12] KVM: TDX: Allocate PAMT memory in tdx_td_vcpu_init() Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 07/12] KVM: TDX: Preallocate PAMT pages to be used in page fault path Kirill A. Shutemov
2025-06-26 11:21   ` Huang, Kai
2025-07-10  1:34   ` Edgecombe, Rick P
2025-07-10  7:49     ` kirill.shutemov
2025-06-09 19:13 ` [PATCHv2 08/12] KVM: TDX: Handle PAMT allocation in " Kirill A. Shutemov
2025-06-12 12:19   ` Chao Gao
2025-06-12 13:05     ` [PATCHv2.1 " Kirill A. Shutemov
2025-06-25 22:38   ` [PATCHv2 " Edgecombe, Rick P
2025-07-09 14:29     ` kirill.shutemov
2025-07-10  1:33   ` Edgecombe, Rick P
2025-07-10  8:45     ` kirill.shutemov
2025-08-21 19:21   ` Sagi Shahar
2025-08-21 19:35     ` Edgecombe, Rick P
2025-08-21 19:53       ` Sagi Shahar
2025-06-09 19:13 ` [PATCHv2 09/12] KVM: TDX: Reclaim PAMT memory Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 10/12] [NOT-FOR-UPSTREAM] x86/virt/tdx: Account PAMT memory and print it in /proc/meminfo Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 11/12] x86/virt/tdx: Enable Dynamic PAMT Kirill A. Shutemov
2025-06-09 19:13 ` [PATCHv2 12/12] Documentation/x86: Add documentation for TDX's " Kirill A. Shutemov
2025-06-25 13:25 ` [PATCHv2 00/12] TDX: Enable " Kirill A. Shutemov
2025-06-25 22:49 ` Edgecombe, Rick P
2025-06-27 13:05   ` kirill.shutemov
2025-08-08 23:18 ` Edgecombe, Rick P
2025-08-11  6:31   ` kas
2025-08-11 22:30     ` Edgecombe, Rick P
2025-08-12  2:02       ` Sean Christopherson
2025-08-12  2:31         ` Vishal Annapurve
2025-08-12  8:04           ` kas
2025-08-12 15:12             ` Edgecombe, Rick P
2025-08-12 16:15               ` Sean Christopherson
2025-08-12 18:39                 ` Edgecombe, Rick P
2025-08-12 22:00                   ` Vishal Annapurve
2025-08-12 23:34                     ` Edgecombe, Rick P
2025-08-13  0:18                       ` Vishal Annapurve
2025-08-13  0:51                         ` Edgecombe, Rick P
2025-08-12 18:44                 ` Vishal Annapurve
2025-08-13  8:09                 ` Kiryl Shutsemau
2025-08-13  7:49               ` Kiryl Shutsemau
2025-08-12  8:03         ` kas
2025-08-13 22:43         ` Edgecombe, Rick P
2025-08-13 23:31           ` Dave Hansen
2025-08-14  0:14             ` Edgecombe, Rick P
2025-08-14 10:55               ` Kiryl Shutsemau
2025-08-15  1:03                 ` Edgecombe, Rick P
2025-08-20 15:31                   ` Sean Christopherson
2025-08-20 16:35                     ` Edgecombe, Rick P

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250609191340.2051741-5-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=chao.gao@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=isaku.yamahata@intel.com \
    --cc=kai.huang@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=rick.p.edgecombe@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=yan.y.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).