linux-coco.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <dave.hansen@intel.com>
To: Rick Edgecombe <rick.p.edgecombe@intel.com>,
	kas@kernel.org, bp@alien8.de, chao.gao@intel.com,
	dave.hansen@linux.intel.com, isaku.yamahata@intel.com,
	kai.huang@intel.com, kvm@vger.kernel.org,
	linux-coco@lists.linux.dev, linux-kernel@vger.kernel.org,
	mingo@redhat.com, pbonzini@redhat.com, seanjc@google.com,
	tglx@linutronix.de, x86@kernel.org, yan.y.zhao@intel.com,
	vannapurve@google.com
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: Re: [PATCH v3 07/16] x86/virt/tdx: Add tdx_alloc/free_page() helpers
Date: Tue, 30 Sep 2025 08:25:40 -0700	[thread overview]
Message-ID: <355ad607-52ed-42cc-9a48-63aaa49f4c68@intel.com> (raw)
In-Reply-To: <20250918232224.2202592-8-rick.p.edgecombe@intel.com>

On 9/18/25 16:22, Rick Edgecombe wrote:
...
> +/*
> + * The TDX spec treats the registers like an array, as they are ordered
> + * in the struct. The array size is limited by the number or registers,
> + * so define the max size it could be for worst case allocations and sanity
> + * checking.
> + */
> +#define MAX_DPAMT_ARG_SIZE (sizeof(struct tdx_module_args) - \
> +			    offsetof(struct tdx_module_args, rdx))
> +
> +/*
> + * Treat struct the registers like an array that starts at RDX, per
> + * TDX spec. Do some sanitychecks, and return an indexable type.
> + */
> +static u64 *dpamt_args_array_ptr(struct tdx_module_args *args)
> +{
> +	WARN_ON_ONCE(tdx_dpamt_entry_pages() > MAX_DPAMT_ARG_SIZE);
> +
> +	/*
> +	 * FORTIFY_SOUCE could inline this and complain when callers copy
> +	 * across fields, which is exactly what this is supposed to be
> +	 * used for. Obfuscate it.
> +	 */
> +	return (u64 *)((u8 *)args + offsetof(struct tdx_module_args, rdx));
> +}

There are a lot of ways to to all of this jazz to alias an array over
the top of a bunch of named structure fields. My worry about this
approach is that it intentionally tries to hide the underlying type from
the compiler.

It could be done with a bunch of union/struct voodoo like 'struct page':

struct tdx_module_args {
	u64 rcx;
	union {
		struct {
			u64 rdx;
			u64 r8;
			u64 r9;
			...
		};
		u64 array[FOO];
	};
}

Or a separate structure:

struct tdx_module_array_args {
	u64 rcx;
	u64 array[FOO];
};

So that you could do something simpler:

u64 *dpamt_args_array_ptr(struct tdx_module_args *args)
{
	return ((struct tdx_module_array_args *)args)->array;
}

Along with one of these somewhere:

BUILD_BUG_ON(sizeof(struct tdx_module_array_args) !=
	     sizeof(struct tdx_module_array));

I personally find the offsetof() tricks to be harder to follow than
either of those.

> +static int alloc_pamt_array(u64 *pa_array)
> +{
> +	struct page *page;
> +
> +	for (int i = 0; i < tdx_dpamt_entry_pages(); i++) {
> +		page = alloc_page(GFP_KERNEL);
> +		if (!page)
> +			return -ENOMEM;
> +		pa_array[i] = page_to_phys(page);
> +	}
> +
> +	return 0;
> +}
> +
> +static void free_pamt_array(u64 *pa_array)
> +{
> +	for (int i = 0; i < tdx_dpamt_entry_pages(); i++) {
> +		if (!pa_array[i])
> +			break;
> +
> +		reset_tdx_pages(pa_array[i], PAGE_SIZE);

One nit: this reset is unnecessary in the error cases here where the
array never gets handed to the TDX module. Right?

> +		/*
> +		 * It might have come from 'prealloc', but this is an error
> +		 * path. Don't be fancy, just free them. TDH.PHYMEM.PAMT.ADD
> +		 * only modifies RAX, so the encoded array is still in place.
> +		 */
> +		__free_page(phys_to_page(pa_array[i]));
> +	}
> +}
> +
> +/*
> + * Add PAMT memory for the given HPA. Return's negative error code
> + * for kernel side error conditions (-ENOMEM) and 1 for TDX Module
> + * error. In the case of TDX module error, the return code is stored
> + * in tdx_err.
> + */
> +static u64 tdh_phymem_pamt_add(unsigned long hpa, u64 *pamt_pa_array)
> +{
> +	struct tdx_module_args args = {
> +		.rcx = hpa,
> +	};
> +	u64 *args_array = dpamt_args_array_ptr(&args);
> +
> +	WARN_ON_ONCE(!IS_ALIGNED(hpa & PAGE_MASK, PMD_SIZE));
> +
> +	/* Copy PAMT page PA's into the struct per the TDX ABI */
> +	memcpy(args_array, pamt_pa_array,
> +	       tdx_dpamt_entry_pages() * sizeof(*args_array));

This uses 'sizeof(*args_array)'.

> +	return seamcall(TDH_PHYMEM_PAMT_ADD, &args);
> +}
> +
> +/* Remove PAMT memory for the given HPA */
> +static u64 tdh_phymem_pamt_remove(unsigned long hpa, u64 *pamt_pa_array)
> +{
> +	struct tdx_module_args args = {
> +		.rcx = hpa,
> +	};
> +	u64 *args_array = dpamt_args_array_ptr(&args);
> +	u64 ret;
> +
> +	WARN_ON_ONCE(!IS_ALIGNED(hpa & PAGE_MASK, PMD_SIZE));
> +
> +	ret = seamcall_ret(TDH_PHYMEM_PAMT_REMOVE, &args);
> +	if (ret)
> +		return ret;
> +
> +	/* Copy PAMT page PA's out of the struct per the TDX ABI */
> +	memcpy(pamt_pa_array, args_array,
> +	       tdx_dpamt_entry_pages() * sizeof(u64));

While this one is sizeof(u64).

Could we make it consistent, please?


> +/* Serializes adding/removing PAMT memory */
> +static DEFINE_SPINLOCK(pamt_lock);
> +
> +/* Bump PAMT refcount for the given page and allocate PAMT memory if needed */
> +int tdx_pamt_get(struct page *page)
> +{
> +	unsigned long hpa = ALIGN_DOWN(page_to_phys(page), PMD_SIZE);
> +	u64 pamt_pa_array[MAX_DPAMT_ARG_SIZE];
> +	atomic_t *pamt_refcount;
> +	u64 tdx_status;
> +	int ret;
> +
> +	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
> +		return 0;
> +
> +	ret = alloc_pamt_array(pamt_pa_array);
> +	if (ret)
> +		return ret;
> +
> +	pamt_refcount = tdx_find_pamt_refcount(hpa);
> +
> +	scoped_guard(spinlock, &pamt_lock) {
> +		if (atomic_read(pamt_refcount))
> +			goto out_free;
> +
> +		tdx_status = tdh_phymem_pamt_add(hpa | TDX_PS_2M, pamt_pa_array);
> +
> +		if (IS_TDX_SUCCESS(tdx_status)) {
> +			atomic_inc(pamt_refcount);
> +		} else {
> +			pr_err("TDH_PHYMEM_PAMT_ADD failed: %#llx\n", tdx_status);
> +			goto out_free;
> +		}

I'm feeling like the states here are under-commented.

	1. PAMT already allocated
	2. 'pamt_pa_array' consumed, bump the refcount
	3. TDH_PHYMEM_PAMT_ADD failed

#1 and #3 need to free the allocation.

Could we add comments to that effect, please?

> +	}

This might get easier to read if the pr_err() gets dumped in
tdh_phymem_pamt_add() instead.

> +	return ret;
> +out_free:
> +	free_pamt_array(pamt_pa_array);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(tdx_pamt_get);
> +
> +/*
> + * Drop PAMT refcount for the given page and free PAMT memory if it is no
> + * longer needed.
> + */
> +void tdx_pamt_put(struct page *page)
> +{
> +	unsigned long hpa = ALIGN_DOWN(page_to_phys(page), PMD_SIZE);
> +	u64 pamt_pa_array[MAX_DPAMT_ARG_SIZE];
> +	atomic_t *pamt_refcount;
> +	u64 tdx_status;
> +
> +	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
> +		return;
> +
> +	hpa = ALIGN_DOWN(hpa, PMD_SIZE);
> +
> +	pamt_refcount = tdx_find_pamt_refcount(hpa);
> +
> +	scoped_guard(spinlock, &pamt_lock) {
> +		if (!atomic_read(pamt_refcount))
> +			return;
> +
> +		tdx_status = tdh_phymem_pamt_remove(hpa | TDX_PS_2M, pamt_pa_array);
> +
> +		if (IS_TDX_SUCCESS(tdx_status)) {
> +			atomic_dec(pamt_refcount);
> +		} else {
> +			pr_err("TDH_PHYMEM_PAMT_REMOVE failed: %#llx\n", tdx_status);
> +			return;
> +		}
> +	}
> +
> +	free_pamt_array(pamt_pa_array);
> +}
> +EXPORT_SYMBOL_GPL(tdx_pamt_put);

It feels like there's some magic in terms of how the entire contents of
pamt_pa_array[] get zeroed so that this ends up being safe.

Could that get commented, please?

> +/* Allocate a page and make sure it is backed by PAMT memory */

This comment is giving the "what" but is weak on the "why". Could we add
this?

	This ensures that the page can be used as TDX private
	memory and obtain TDX protections.

> +struct page *tdx_alloc_page(void)
> +{
> +	struct page *page;
> +
> +	page = alloc_page(GFP_KERNEL);
> +	if (!page)
> +		return NULL;
> +
> +	if (tdx_pamt_get(page)) {
> +		__free_page(page);
> +		return NULL;
> +	}
> +
> +	return page;
> +}
> +EXPORT_SYMBOL_GPL(tdx_alloc_page);
> +
> +/* Free a page and release its PAMT memory */

Also:

	After this, the page is can no longer be protected by TDX.

> +void tdx_free_page(struct page *page)
> +{
> +	if (!page)
> +		return;
> +
> +	tdx_pamt_put(page);
> +	__free_page(page);
> +}
> +EXPORT_SYMBOL_GPL(tdx_free_page);
> diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
> index 82bb82be8567..46c4214b79fb 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.h
> +++ b/arch/x86/virt/vmx/tdx/tdx.h
> @@ -46,6 +46,8 @@
>  #define TDH_PHYMEM_PAGE_WBINVD		41
>  #define TDH_VP_WR			43
>  #define TDH_SYS_CONFIG			45
> +#define TDH_PHYMEM_PAMT_ADD		58
> +#define TDH_PHYMEM_PAMT_REMOVE		59
>  
>  /*
>   * SEAMCALL leaf:


  parent reply	other threads:[~2025-09-30 15:25 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-18 23:22 [PATCH v3 00/16] TDX: Enable Dynamic PAMT Rick Edgecombe
2025-09-18 23:22 ` [PATCH v3 01/16] x86/tdx: Move all TDX error defines into <asm/shared/tdx_errno.h> Rick Edgecombe
2025-09-19  1:29   ` Huang, Kai
2025-09-25 23:23     ` Edgecombe, Rick P
2025-09-25 23:32       ` Huang, Kai
2025-09-23  5:49   ` Binbin Wu
2025-09-25 23:09     ` Edgecombe, Rick P
2025-09-26  5:36       ` Binbin Wu
2025-09-26  4:52   ` Xiaoyao Li
2025-09-26 19:53     ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 02/16] x86/tdx: Add helpers to check return status codes Rick Edgecombe
2025-09-19  1:26   ` Huang, Kai
2025-09-25 23:27     ` Edgecombe, Rick P
2025-09-23  6:19   ` Binbin Wu
2025-09-25 23:24     ` Edgecombe, Rick P
2025-09-26  6:32   ` Xiaoyao Li
2025-09-26 21:27     ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 03/16] x86/virt/tdx: Simplify tdmr_get_pamt_sz() Rick Edgecombe
2025-09-19  0:50   ` Huang, Kai
2025-09-19 19:26     ` Edgecombe, Rick P
2025-09-29 11:44     ` Xiaoyao Li
2025-09-29 17:47       ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 04/16] x86/virt/tdx: Allocate page bitmap for Dynamic PAMT Rick Edgecombe
2025-09-23  7:15   ` Binbin Wu
2025-09-25 23:28     ` Edgecombe, Rick P
2025-09-26  8:41   ` Xiaoyao Li
2025-09-26 21:57     ` Edgecombe, Rick P
2025-09-26 22:06       ` Dave Hansen
2025-10-06 19:34       ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 05/16] x86/virt/tdx: Allocate reference counters for PAMT memory Rick Edgecombe
2025-09-23  7:45   ` Binbin Wu
2025-09-29 17:41     ` Edgecombe, Rick P
2025-09-29 18:08       ` Dave Hansen
2025-09-30  1:04         ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 06/16] x86/virt/tdx: Improve PAMT refcounters allocation for sparse memory Rick Edgecombe
2025-09-19  7:25   ` Huang, Kai
2025-09-23  9:38     ` Binbin Wu
2025-09-24  6:50       ` Huang, Kai
2025-09-24  8:57         ` Binbin Wu
2025-10-01  0:32           ` Edgecombe, Rick P
2025-10-01 10:40             ` Huang, Kai
2025-10-01 19:00               ` Edgecombe, Rick P
2025-10-01 20:49                 ` Huang, Kai
2025-10-15  1:35   ` Huang, Kai
2025-09-18 23:22 ` [PATCH v3 07/16] x86/virt/tdx: Add tdx_alloc/free_page() helpers Rick Edgecombe
2025-09-22 11:27   ` Huang, Kai
2025-09-26 22:41     ` Edgecombe, Rick P
2025-09-29  7:56   ` Yan Zhao
2025-09-29 17:19     ` Edgecombe, Rick P
2025-09-30 14:03   ` Xiaoyao Li
2025-09-30 17:38     ` Dave Hansen
2025-09-30 17:47     ` Edgecombe, Rick P
2025-09-30 15:25   ` Dave Hansen [this message]
2025-09-30 17:00     ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 08/16] x86/virt/tdx: Optimize " Rick Edgecombe
2025-09-19  9:39   ` Kiryl Shutsemau
2025-09-24  6:15   ` Binbin Wu
2025-09-18 23:22 ` [PATCH v3 09/16] KVM: TDX: Allocate PAMT memory for TD control structures Rick Edgecombe
2025-09-18 23:22 ` [PATCH v3 10/16] KVM: TDX: Allocate PAMT memory for vCPU " Rick Edgecombe
2025-09-18 23:22 ` [PATCH v3 11/16] KVM: TDX: Add x86 ops for external spt cache Rick Edgecombe
2025-09-19  9:44   ` Kiryl Shutsemau
2025-09-23  7:03   ` Yan Zhao
2025-09-26 22:10     ` Edgecombe, Rick P
2025-09-28  8:35       ` Yan Zhao
2025-09-24  7:58   ` Binbin Wu
2025-09-30  1:02   ` Yan Zhao
2025-09-30 17:54     ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 12/16] x86/virt/tdx: Add helpers to allow for pre-allocating pages Rick Edgecombe
2025-09-19  9:55   ` Kiryl Shutsemau
2025-10-01 19:48     ` Edgecombe, Rick P
2025-09-22 11:20   ` Huang, Kai
2025-09-26 23:47     ` Edgecombe, Rick P
2025-09-28 22:56       ` Huang, Kai
2025-09-29 12:10         ` Huang, Kai
2025-09-26  1:44   ` Yan Zhao
2025-09-26 22:05     ` Edgecombe, Rick P
2025-09-28  1:40       ` Yan Zhao
2025-09-26 15:19   ` Dave Hansen
2025-09-26 15:49     ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 13/16] KVM: TDX: Handle PAMT allocation in fault path Rick Edgecombe
2025-09-30  1:09   ` Yan Zhao
2025-09-30 18:11     ` Edgecombe, Rick P
2025-09-18 23:22 ` [PATCH v3 14/16] KVM: TDX: Reclaim PAMT memory Rick Edgecombe
2025-09-18 23:22 ` [PATCH v3 15/16] x86/virt/tdx: Enable Dynamic PAMT Rick Edgecombe
2025-09-18 23:22 ` [PATCH v3 16/16] Documentation/x86: Add documentation for TDX's " Rick Edgecombe
2025-09-26  2:28 ` [PATCH v3 00/16] TDX: Enable " Yan Zhao
2025-09-26 14:09   ` Dave Hansen
2025-09-26 16:02     ` Edgecombe, Rick P
2025-09-26 16:11       ` Dave Hansen
2025-09-26 19:00         ` Edgecombe, Rick P
2025-09-26 19:03           ` Dave Hansen
2025-09-26 19:52             ` Edgecombe, Rick P
2025-09-28  1:34           ` Yan Zhao
2025-09-29 11:17             ` Kiryl Shutsemau
2025-09-29 16:22               ` Dave Hansen
2025-09-29 16:58                 ` Edgecombe, Rick P
2025-09-30 18:29                   ` Edgecombe, Rick P

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=355ad607-52ed-42cc-9a48-63aaa49f4c68@intel.com \
    --to=dave.hansen@intel.com \
    --cc=bp@alien8.de \
    --cc=chao.gao@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=isaku.yamahata@intel.com \
    --cc=kai.huang@intel.com \
    --cc=kas@kernel.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=rick.p.edgecombe@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=vannapurve@google.com \
    --cc=x86@kernel.org \
    --cc=yan.y.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).