Linux Confidential Computing Development
 help / color / mirror / Atom feed
* Re: [PATCH v7 5/6] firmware: smccc: arm-cca-guest: Bind the TSM provider to an SMCCC device
From: Suzuki K Poulose @ 2026-06-11 17:06 UTC (permalink / raw)
  To: Aneesh Kumar K.V (Arm), linux-coco, linux-arm-kernel,
	linux-kernel
  Cc: Catalin Marinas, Greg KH, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price, Andre Przywara
In-Reply-To: <20260611130429.295516-6-aneesh.kumar@kernel.org>

On 11/06/2026 14:04, Aneesh Kumar K.V (Arm) wrote:
> The Arm CCA guest TSM provider currently binds through the arm-cca-dev
> platform device. Like arm-smccc-trng, this device is not an independent
> platform resource; it is a software representation of the RSI firmware
> service discovered through SMCCC.
> 
> Move RSI discovery into the SMCCC firmware driver. When the SMCCC conduit
> is SMC and if RSI ABI version call is supported, create an arm-rsi-dev
> SMCCC device. Convert the Arm CCA guest TSM provider to an SMCCC driver so
> it binds to that discovered RSI service and keeps module autoloading
> through the SMCCC device id table.
> 
> Keep the old arm-cca-dev platform-device registration for now. Userspace
> has used that device as a Realm-guest indicator, so removing it is left to
> a follow-up patch that adds a replacement sysfs ABI.
> 
> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> ---
>   arch/arm64/include/asm/rsi.h              |  2 -
>   arch/arm64/kernel/rsi.c                   |  2 +-
>   drivers/firmware/smccc/smccc.c            |  7 +++
>   drivers/virt/coco/arm-cca-guest/Kconfig   |  1 +
>   drivers/virt/coco/arm-cca-guest/arm-cca.c | 56 +++++++++++------------
>   include/linux/arm-smccc-rsi.h             |  2 +
>   6 files changed, 39 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
> index 88b50d660e85..5f9c8623183d 100644
> --- a/arch/arm64/include/asm/rsi.h
> +++ b/arch/arm64/include/asm/rsi.h
> @@ -10,8 +10,6 @@
>   #include <linux/jump_label.h>
>   #include <asm/rsi_cmds.h>
>   
> -#define RSI_PDEV_NAME "arm-cca-dev"
> -
>   DECLARE_STATIC_KEY_FALSE(rsi_present);
>   
>   void __init arm64_rsi_init(void);
> diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
> index 92160f2e57ff..da440f71bb64 100644
> --- a/arch/arm64/kernel/rsi.c
> +++ b/arch/arm64/kernel/rsi.c
> @@ -161,7 +161,7 @@ void __init arm64_rsi_init(void)
>   }
>   
>   static struct platform_device rsi_dev = {
> -	.name = RSI_PDEV_NAME,
> +	.name = "arm-cca-dev",
>   	.id = PLATFORM_DEVID_NONE
>   };
>   
> diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
> index a47696f3a5de..7127af3dbe5c 100644
> --- a/drivers/firmware/smccc/smccc.c
> +++ b/drivers/firmware/smccc/smccc.c
> @@ -10,6 +10,7 @@
>   #include <linux/arm-smccc.h>
>   #include <linux/kernel.h>
>   #include <linux/arm-smccc-bus.h>
> +#include <linux/arm-smccc-rsi.h>
>   
>   #include <asm/archrandom.h>
>   
> @@ -94,6 +95,12 @@ static const struct smccc_device_info smccc_devices[] __initconst = {
>   		.requires_smc   = false,
>   		.device_name    = "arm-smccc-trng",
>   	},
> +
> +	{
> +		.func_id        = SMC_RSI_ABI_VERSION,
> +		.requires_smc   = true,
> +		.device_name    = RSI_DEV_NAME,
> +	},
>   };
>   
>   static bool __init smccc_probe_smccc_device(const struct smccc_device_info *smccc_dev)
> diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig
> index 3f0f013f03f1..ad7538750c5a 100644
> --- a/drivers/virt/coco/arm-cca-guest/Kconfig
> +++ b/drivers/virt/coco/arm-cca-guest/Kconfig
> @@ -1,6 +1,7 @@
>   config ARM_CCA_GUEST
>   	tristate "Arm CCA Guest driver"
>   	depends on ARM64
> +	depends on HAVE_ARM_SMCCC_DISCOVERY
>   	select TSM_REPORTS
>   	help
>   	  The driver provides userspace interface to request and
> diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca.c b/drivers/virt/coco/arm-cca-guest/arm-cca.c
> index 0bbd1fa53ee4..4f9289ccf498 100644
> --- a/drivers/virt/coco/arm-cca-guest/arm-cca.c
> +++ b/drivers/virt/coco/arm-cca-guest/arm-cca.c
> @@ -4,6 +4,7 @@
>    */
>   
>   #include <linux/arm-smccc.h>
> +#include <linux/arm-smccc-bus.h>
>   #include <linux/cc_platform.h>
>   #include <linux/kernel.h>
>   #include <linux/mod_devicetable.h>
> @@ -189,16 +190,12 @@ static const struct tsm_report_ops arm_cca_tsm_report_ops = {
>   	.report_new = arm_cca_report_new,
>   };
>   
> -/**
> - * arm_cca_guest_init - Register with the Trusted Security Module (TSM)
> - * interface.
> - *
> - * Return:
> - * * %0        - Registered successfully with the TSM interface.
> - * * %-ENODEV  - The execution context is not an Arm Realm.
> - * * %-EBUSY   - Already registered.
> - */
> -static int __init arm_cca_guest_init(void)
> +static void unregister_cca_tsm_report(void *data)
> +{
> +	tsm_report_unregister(&arm_cca_tsm_report_ops);
> +}
> +
> +static int cca_tsm_probe(struct arm_smccc_device *sdev)
>   {
>   	int ret;
>   
> @@ -206,30 +203,33 @@ static int __init arm_cca_guest_init(void)
>   		return -ENODEV;
>   
>   	ret = tsm_report_register(&arm_cca_tsm_report_ops, NULL);
> -	if (ret < 0)
> -		pr_err("Error %d registering with TSM\n", ret);
> +	if (ret < 0) {
> +		dev_err_probe(&sdev->dev, ret, "Error registering with TSM\n");
> +		return ret;
> +	}
>   
> -	return ret;
> -}
> -module_init(arm_cca_guest_init);
> +	ret = devm_add_action_or_reset(&sdev->dev, unregister_cca_tsm_report,
> +				       NULL);
> +	if (ret < 0) {
> +		dev_err_probe(&sdev->dev, ret, "Error registering devm action\n");
> +		return ret;
> +	}
>   
> -/**
> - * arm_cca_guest_exit - unregister with the Trusted Security Module (TSM)
> - * interface.
> - */
> -static void __exit arm_cca_guest_exit(void)
> -{
> -	tsm_report_unregister(&arm_cca_tsm_report_ops);
> +	return 0;
>   }
> -module_exit(arm_cca_guest_exit);
>   
> -/* modalias, so userspace can autoload this module when RSI is available */
> -static const struct platform_device_id arm_cca_match[] __maybe_unused = {
> -	{ RSI_PDEV_NAME, 0},
> -	{ }
> +static const struct arm_smccc_device_id cca_tsm_id_table[] = {
> +	{ .name = RSI_DEV_NAME },
> +	{}
>   };
> +MODULE_DEVICE_TABLE(arm_smccc, cca_tsm_id_table);
>   
> -MODULE_DEVICE_TABLE(platform, arm_cca_match);
> +static struct arm_smccc_driver cca_tsm_driver = {
> +	.name = KBUILD_MODNAME,
> +	.probe = cca_tsm_probe,
> +	.id_table = cca_tsm_id_table,
> +};
> +module_arm_smccc_driver(cca_tsm_driver);
>   MODULE_AUTHOR("Sami Mujawar <sami.mujawar@arm.com>");
>   MODULE_DESCRIPTION("Arm CCA Guest TSM Driver");
>   MODULE_LICENSE("GPL");
> diff --git a/include/linux/arm-smccc-rsi.h b/include/linux/arm-smccc-rsi.h
> index fddb77986f70..ae663aa8fd7f 100644
> --- a/include/linux/arm-smccc-rsi.h
> +++ b/include/linux/arm-smccc-rsi.h
> @@ -8,6 +8,8 @@
>   
>   #include <linux/arm-smccc.h>
>   
> +#define RSI_DEV_NAME "arm-rsi-dev"

This shouldn't be here ? This is not part of the SMCCC RSI standard, but
a linux thing. May be in drivers/firmware/../rsi.h ?

Rest looks fine.

Suzuki


> +
>   /*
>    * This file describes the Realm Services Interface (RSI) Application Binary
>    * Interface (ABI) for SMC calls made from within the Realm to the RMM and


^ permalink raw reply

* Re: [RFC PATCH 06/15] x86/virt/tdx: Initialize Quoting extension during bringup
From: Adrian Hunter @ 2026-06-11 16:22 UTC (permalink / raw)
  To: Xu Yilun, kas, djbw, rick.p.edgecombe, x86, peter.fang
  Cc: linux-coco, linux-kernel, kvm, sohil.mehta, yilun.xu, baolu.lu,
	zhenzhong.duan, xiaoyao.li
In-Reply-To: <20260522034128.3144354-7-yilun.xu@linux.intel.com>

On 22/05/2026 06:41, Xu Yilun wrote:
> From: Peter Fang <peter.fang@intel.com>
> 
> Initialize the Quoting extension and fetch its metadata during TDX
> bringup.
> 
> Because Quoting is an optional TDX feature, do not let its
> initialization failures cause TDX bringup to fail.

Is there a reason Linux needs to support TDX with failed Quote
extension initialization?

> +static void tdx_quote_init(void)
> +{
> +	struct tdx_module_args args = {};
> +	u64 r;
> +
> +	do {
> +		r = seamcall(TDH_QUOTE_INIT, &args);
> +	} while (r == TDX_INTERRUPTED_RESUMABLE);
> +
> +	if (r)

Elsewhere it tends to be:

	if (r != TDX_SUCCESS)

> +		return;
> +
> +	/* Quoting metadata is valid only after initialization */
> +	get_tdx_sys_info_quote(&tdx_sysinfo.quote);

^ permalink raw reply

* Re: [RFC PATCH 05/15] x86/virt/tdx: Move tdx_tdr_pa() up in the file
From: Adrian Hunter @ 2026-06-11 16:21 UTC (permalink / raw)
  To: Xu Yilun, kas, djbw, rick.p.edgecombe, x86, peter.fang
  Cc: linux-coco, linux-kernel, kvm, sohil.mehta, yilun.xu, baolu.lu,
	zhenzhong.duan, xiaoyao.li
In-Reply-To: <20260522034128.3144354-6-yilun.xu@linux.intel.com>

On 22/05/2026 06:41, Xu Yilun wrote:
> From: Peter Fang <peter.fang@intel.com>
> 
> Move the tdx_tdr_pa() in preparation for upcoming changes to use them

them -> it


^ permalink raw reply

* Re: [PATCH v7 3/6] firmware: smccc: Move RSI definitions to include/linux
From: Suzuki K Poulose @ 2026-06-11 16:04 UTC (permalink / raw)
  To: Aneesh Kumar K.V (Arm), linux-coco, linux-arm-kernel,
	linux-kernel
  Cc: Catalin Marinas, Greg KH, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price, Andre Przywara
In-Reply-To: <20260611130429.295516-4-aneesh.kumar@kernel.org>

On 11/06/2026 14:04, Aneesh Kumar K.V (Arm) wrote:
> The RSI SMCCC function IDs describe a firmware ABI and are not arm64
> architecture specific definitions. Follow-up changes need to use them from
> non-arch code, including drivers/firmware/smccc and the Arm CCA guest
> driver.
> 
> Move the RSI SMCCC definitions from arch/arm64/include/asm/ to
> include/linux/ so they can be shared with the driver code. This also
> keeps the firmware interface outside architecture code, as requested [1].

Please could we also mention about moving the "wrappers" only used by
drivers accordingly ?

> 
> [1] https://lore.kernel.org/all/agsNO9cc7H-b0H8L@willie-the-truck
> 
> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> ---
>   arch/arm64/include/asm/rsi_cmds.h             | 74 +---------------
>   .../virt/coco/arm-cca-guest/arm-cca-guest.c   |  2 +
>   drivers/virt/coco/arm-cca-guest/rsi.h         | 84 +++++++++++++++++++
>   .../linux/arm-smccc-rsi.h                     |  6 +-
>   4 files changed, 90 insertions(+), 76 deletions(-)
>   create mode 100644 drivers/virt/coco/arm-cca-guest/rsi.h
>   rename arch/arm64/include/asm/rsi_smc.h => include/linux/arm-smccc-rsi.h (98%)
> 
> diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h
> index 2c8763876dfb..633123a4e5d5 100644
> --- a/arch/arm64/include/asm/rsi_cmds.h
> +++ b/arch/arm64/include/asm/rsi_cmds.h
> @@ -8,10 +8,9 @@
>   
>   #include <linux/arm-smccc.h>
>   #include <linux/string.h>
> +#include <linux/arm-smccc-rsi.h>

super minor nit: Please keep them in the alphabetical order.

With that:

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>

Suzuki


>   #include <asm/memory.h>
>   
> -#include <asm/rsi_smc.h>
> -
>   #define RSI_GRANULE_SHIFT		12
>   #define RSI_GRANULE_SIZE		(_AC(1, UL) << RSI_GRANULE_SHIFT)
>   
> @@ -88,75 +87,4 @@ static inline long rsi_set_addr_range_state(phys_addr_t start,
>   	return res.a0;
>   }
>   
> -/**
> - * rsi_attestation_token_init - Initialise the operation to retrieve an
> - * attestation token.
> - *
> - * @challenge:	The challenge data to be used in the attestation token
> - *		generation.
> - * @size:	Size of the challenge data in bytes.
> - *
> - * Initialises the attestation token generation and returns an upper bound
> - * on the attestation token size that can be used to allocate an adequate
> - * buffer. The caller is expected to subsequently call
> - * rsi_attestation_token_continue() to retrieve the attestation token data on
> - * the same CPU.
> - *
> - * Returns:
> - *  On success, returns the upper limit of the attestation report size.
> - *  Otherwise, -EINVAL
> - */
> -static inline long
> -rsi_attestation_token_init(const u8 *challenge, unsigned long size)
> -{
> -	struct arm_smccc_1_2_regs regs = { 0 };
> -
> -	/* The challenge must be at least 32bytes and at most 64bytes */
> -	if (!challenge || size < 32 || size > 64)
> -		return -EINVAL;
> -
> -	regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT;
> -	memcpy(&regs.a1, challenge, size);
> -	arm_smccc_1_2_smc(&regs, &regs);
> -
> -	if (regs.a0 == RSI_SUCCESS)
> -		return regs.a1;
> -
> -	return -EINVAL;
> -}
> -
> -/**
> - * rsi_attestation_token_continue - Continue the operation to retrieve an
> - * attestation token.
> - *
> - * @granule: {I}PA of the Granule to which the token will be written.
> - * @offset:  Offset within Granule to start of buffer in bytes.
> - * @size:    The size of the buffer.
> - * @len:     The number of bytes written to the buffer.
> - *
> - * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller
> - * is expected to call rsi_attestation_token_init() before calling this
> - * function to retrieve the attestation token.
> - *
> - * Return:
> - * * %RSI_SUCCESS     - Attestation token retrieved successfully.
> - * * %RSI_INCOMPLETE  - Token generation is not complete.
> - * * %RSI_ERROR_INPUT - A parameter was not valid.
> - * * %RSI_ERROR_STATE - Attestation not in progress.
> - */
> -static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule,
> -							   unsigned long offset,
> -							   unsigned long size,
> -							   unsigned long *len)
> -{
> -	struct arm_smccc_res res;
> -
> -	arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE,
> -			     granule, offset, size, 0, &res);
> -
> -	if (len)
> -		*len = res.a1;
> -	return res.a0;
> -}
> -
>   #endif /* __ASM_RSI_CMDS_H */
> diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
> index 66d00b6ceb78..8b6854e7a188 100644
> --- a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
> +++ b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
> @@ -14,6 +14,8 @@
>   
>   #include <asm/rsi.h>
>   
> +#include "rsi.h"
> +
>   /**
>    * struct arm_cca_token_info - a descriptor for the token buffer.
>    * @challenge:		Pointer to the challenge data
> diff --git a/drivers/virt/coco/arm-cca-guest/rsi.h b/drivers/virt/coco/arm-cca-guest/rsi.h
> new file mode 100644
> index 000000000000..f7303f4bce17
> --- /dev/null
> +++ b/drivers/virt/coco/arm-cca-guest/rsi.h
> @@ -0,0 +1,84 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2026 ARM Ltd.
> + */
> +
> +#ifndef _VIRT_COCO_RSI_H_
> +#define _VIRT_COCO_RSI_H_
> +
> +#include <linux/arm-smccc-rsi.h>
> +
> +/**
> + * rsi_attestation_token_init - Initialise the operation to retrieve an
> + * attestation token.
> + *
> + * @challenge:	The challenge data to be used in the attestation token
> + *		generation.
> + * @size:	Size of the challenge data in bytes.
> + *
> + * Initialises the attestation token generation and returns an upper bound
> + * on the attestation token size that can be used to allocate an adequate
> + * buffer. The caller is expected to subsequently call
> + * rsi_attestation_token_continue() to retrieve the attestation token data on
> + * the same CPU.
> + *
> + * Returns:
> + *  On success, returns the upper limit of the attestation report size.
> + *  Otherwise, -EINVAL
> + */
> +static inline long
> +rsi_attestation_token_init(const u8 *challenge, unsigned long size)
> +{
> +	struct arm_smccc_1_2_regs regs = { 0 };
> +
> +	/* The challenge must be at least 32bytes and at most 64bytes */
> +	if (!challenge || size < 32 || size > 64)
> +		return -EINVAL;
> +
> +	regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT;
> +	memcpy(&regs.a1, challenge, size);
> +	arm_smccc_1_2_smc(&regs, &regs);
> +
> +	if (regs.a0 == RSI_SUCCESS)
> +		return regs.a1;
> +
> +	return -EINVAL;
> +}
> +
> +/**
> + * rsi_attestation_token_continue - Continue the operation to retrieve an
> + * attestation token.
> + *
> + * @granule: {I}PA of the Granule to which the token will be written.
> + * @offset:  Offset within Granule to start of buffer in bytes.
> + * @size:    The size of the buffer.
> + * @len:     The number of bytes written to the buffer.
> + *
> + * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller
> + * is expected to call rsi_attestation_token_init() before calling this
> + * function to retrieve the attestation token.
> + *
> + * Return:
> + * * %RSI_SUCCESS     - Attestation token retrieved successfully.
> + * * %RSI_INCOMPLETE  - Token generation is not complete.
> + * * %RSI_ERROR_INPUT - A parameter was not valid.
> + * * %RSI_ERROR_STATE - Attestation not in progress.
> + */
> +static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule,
> +							   unsigned long offset,
> +							   unsigned long size,
> +							   unsigned long *len)
> +{
> +	struct arm_smccc_res res;
> +
> +	arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE,
> +			     granule, offset, size, 0, &res);
> +
> +	if (len)
> +		*len = res.a1;
> +	return res.a0;
> +}
> +
> +
> +
> +#endif
> diff --git a/arch/arm64/include/asm/rsi_smc.h b/include/linux/arm-smccc-rsi.h
> similarity index 98%
> rename from arch/arm64/include/asm/rsi_smc.h
> rename to include/linux/arm-smccc-rsi.h
> index e19253f96c94..fddb77986f70 100644
> --- a/arch/arm64/include/asm/rsi_smc.h
> +++ b/include/linux/arm-smccc-rsi.h
> @@ -3,8 +3,8 @@
>    * Copyright (C) 2023 ARM Ltd.
>    */
>   
> -#ifndef __ASM_RSI_SMC_H_
> -#define __ASM_RSI_SMC_H_
> +#ifndef __LINUX_ARM_SMCCC_RSI_H_
> +#define __LINUX_ARM_SMCCC_RSI_H_
>   
>   #include <linux/arm-smccc.h>
>   
> @@ -190,4 +190,4 @@ struct realm_config {
>    */
>   #define SMC_RSI_HOST_CALL			SMC_RSI_FID(0x199)
>   
> -#endif /* __ASM_RSI_SMC_H_ */
> +#endif /* __LINUX_ARM_SMCCC_RSI_H_ */


^ permalink raw reply

* Re: [PATCH v7 00/42] guest_memfd: In-place conversion support
From: Sean Christopherson @ 2026-06-11 15:46 UTC (permalink / raw)
  To: Ackerley Tng
  Cc: Ackerley Tng via B4 Relay, aik, andrew.jones, binbin.wu, brauner,
	chao.p.peng, david, ira.weiny, jmattson, jthoughton, michael.roth,
	oupton, pankaj.gupta, qperret, rick.p.edgecombe, rientjes,
	shivankg, steven.price, tabba, willy, wyihan, yan.y.zhao,
	forkloop, pratyush, suzuki.poulose, aneesh.kumar, liam,
	Paolo Bonzini, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
	Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
	Kairui Song, Kemeng Shi, Nhat Pham, Baoquan He, Barry Song,
	Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng,
	Shakeel Butt, Kiryl Shutsemau, Jason Gunthorpe, Vlastimil Babka,
	kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm, linux-coco
In-Reply-To: <CAEvNRgF31BzyFyVUa7tDJ=qJ-8ws2kxfNjLxmV=OxKSqhaOiPw@mail.gmail.com>

On Wed, Jun 10, 2026, Ackerley Tng wrote:
> Sean Christopherson <seanjc@google.com> writes:
> 
> > On Thu, Jun 04, 2026, Ackerley Tng wrote:
> >> Sean Christopherson <seanjc@google.com> writes:
> >> >> + KVM: selftests: Test conversion with elevated page refcount
> >> >>     + Askar pointed out that soon vmsplice may not pin pages. Should I
> >> >>       pin pages through CONFIG_GUP_TEST like in [2]? I prefer not to
> >> >>       take a dependency on CONFIG_GUP_TEST.
> >> >
> >> > I'm not exactly excited about taking a dependency on CONFIG_GUP_TEST either, but
> >> > it probably is the least awful choice.  E.g. KVM also pins pages is certain flows,
> >> > but we're _also_ actively working to remove the need to pin.
> >> >
> >> > Hmm, maybe IORING_REGISTER_PBUF_RING?  AFAICT, it's almost literally a "pin user
> >> > memory" syscall.
> >> >
> >>
> >> Hmm that takes a dependency on io_uring, which isn't always compiled
> >> in. Between CONFIG_IO_URING and CONFIG_GUP_TEST, I'd rather
> >> CONFIG_GUP_TEST.
> >
> > Or try both?  If it's not a ridiculous amount of work.
> 
> CONFIG_GUP_TEST was tried in [1]
> 
> [1] https://lore.kernel.org/all/baa8838f623102931e755cf34c86314b305af49c.1747264138.git.ackerleytng@google.com/
> 
> It looks like this
> 
>   static void pin_pages(void *vaddr, uint64_t size)
>   {
>   	const struct pin_longterm_test args = {
>   		.addr = (uint64_t)vaddr,
>   		.size = size,
>   		.flags = PIN_LONGTERM_TEST_FLAG_USE_WRITE,
>   	};
> 
>   	gup_test_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
>   	TEST_REQUIRE(gup_test_fd > 0);

Use __open_path_or_exit().  I also think it makes sent to make these available
to all KVM selftests, there are probably other testcases that could utilize page
pinning.

>   	TEST_ASSERT_EQ(ioctl(gup_test_fd, PIN_LONGTERM_TEST_START, &args), 0);
>   }
> 
>   static void unpin_pages(void)
>   {
>   	TEST_ASSERT_EQ(ioctl(gup_test_fd, PIN_LONGTERM_TEST_STOP), 0);
>   }
> 
> So in the test I'll call pin_pages(), then try to convert, see that it
> fails with EAGAIN and reports the expected error_offset, then I call
> unpin_pages(), then I convert again and expect success.
> 
> Are you uncomfortable with the CONFIG_GUP_TEST interface?

No, my concern is/was the potential for leaking pages if the test fails/crashes,
but it looks gup_test_release() ensures all pins are dropped when the file is
released, so that should be a non-issue.

> What would you like me to try with CONFIG_IO_URING? I'm thinking that the
> main difference between the two is just down to which non-default CONFIG
> option we want to take for guest_memfd tests.

^ permalink raw reply

* [PATCH RFC 3/3] KVM: selftests: exercise guest_memfd folio migration
From: Shivank Garg @ 2026-06-11 13:05 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Jan Kara, Andrew Morton, Vlastimil Babka,
	Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
	Johannes Weiner, Zi Yan, David Hildenbrand, Matthew Brost,
	Joshua Hahn, Rakie Kim, Byungchul Park, Gregory Price, Ying Huang,
	Alistair Popple, Paolo Bonzini, Shuah Khan, Chao Peng,
	Nikunj A Dadhania, Ira Weiny, Michael Roth, Pankaj Gupta,
	Ackerley Tng, Fuad Tabba, Sean Christopherson, Vishal Annapurve,
	Nikita Kalyazin, Patrick Roy, Pratik Sampat, Ashish Kalra
  Cc: linux-fsdevel, linux-coco, linux-mm, linux-kernel, kvm,
	linux-kselftest, Shivank Garg
In-Reply-To: <20260611-shivank-gmem-migrate-v1-0-2d266bfc6f95@amd.com>

Add a migration test to guest_memfd_test, run for the
MMAP | INIT_SHARED configuration on systems with at least two NUMA
nodes (skipped otherwise).

Migrate every folio from node 0 to node 1 with move_pages(2) and
check both the resulting node and the data. Migrate them back and
re-check the data.

Signed-off-by: Shivank Garg <shivankg@amd.com>
---
 tools/testing/selftests/kvm/guest_memfd_test.c | 77 ++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 832ef4dfb99faa4411af847d21eb426c34342434..04931d3add46cb117fe5b093ed48f838cb124542 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -76,6 +76,82 @@ static void test_mmap_supported(int fd, size_t total_size)
 	kvm_munmap(mem, total_size);
 }
 
+/*
+ * Each page is filled with a distinct byte (its index). Check every byte that
+ * data is intact after migration.
+ */
+static void verify_page(const char *page, int page_idx, size_t size,
+			const char *when)
+{
+	char expected = (char)(page_idx & 0xff);
+	size_t off;
+
+	for (off = 0; off < size; off++)
+		TEST_ASSERT(page[off] == expected,
+			    "Page %d corrupted at offset %zu %s", page_idx, off, when);
+}
+
+static void test_migrate_folio(int fd, size_t total_size)
+{
+	const unsigned long nodemask_0 = 1; /* nid: 0 */
+	unsigned long maxnode = BITS_PER_TYPE(nodemask_0);
+	int page_count = total_size / page_size;
+	void **addr;
+	int *status, *nodes;
+	char *mem;
+	int i;
+
+	if (!is_multi_numa_node_system())
+		return;
+
+	mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+	addr = calloc(page_count, sizeof(*addr));
+	status = calloc(page_count, sizeof(*status));
+	nodes = calloc(page_count, sizeof(*nodes));
+	TEST_ASSERT(addr && status && nodes, "Failed to allocate page arrays");
+
+	/* Allocate all folios on node 0 and fill each with a known pattern. */
+	kvm_mbind(mem, total_size, MPOL_BIND, &nodemask_0, maxnode, 0);
+	for (i = 0; i < page_count; i++) {
+		memset(mem + i * page_size, (char)(i & 0xff), page_size);
+		addr[i] = mem + i * page_size;
+	}
+
+	kvm_move_pages(0, page_count, addr, NULL, status, 0);
+	for (i = 0; i < page_count; i++)
+		TEST_ASSERT(status[i] == 0, "Page %d should be on node 0", i);
+
+	/* Migrate node 0 -> 1, then check both the location and the data. */
+	for (i = 0; i < page_count; i++)
+		nodes[i] = 1;
+	kvm_move_pages(0, page_count, addr, nodes, status, MPOL_MF_MOVE);
+
+	kvm_move_pages(0, page_count, addr, NULL, status, 0);
+	for (i = 0; i < page_count; i++)
+		TEST_ASSERT(status[i] == 1,
+			    "Page %d should be on node 1 after migration", i);
+	for (i = 0; i < page_count; i++)
+		verify_page(mem + i * page_size, i, page_size, "after migration");
+
+	/* Migrate back node 1 -> 0, then re-check the location and the data. */
+	for (i = 0; i < page_count; i++)
+		nodes[i] = 0;
+	kvm_move_pages(0, page_count, addr, nodes, status, MPOL_MF_MOVE);
+
+	kvm_move_pages(0, page_count, addr, NULL, status, 0);
+	for (i = 0; i < page_count; i++)
+		TEST_ASSERT(status[i] == 0,
+			    "Page %d should be on node 0 after round-trip", i);
+	for (i = 0; i < page_count; i++)
+		verify_page(mem + i * page_size, i, page_size, "after round-trip");
+
+	free(addr);
+	free(status);
+	free(nodes);
+	kvm_munmap(mem, total_size);
+}
+
 static void test_mbind(int fd, size_t total_size)
 {
 	const unsigned long nodemask_0 = 1; /* nid: 0 */
@@ -434,6 +510,7 @@ static void __test_guest_memfd(struct kvm_vm *vm, u64 flags)
 			gmem_test(fault_overflow, vm, flags);
 			gmem_test(numa_allocation, vm, flags);
 			__gmem_test(collapse, vm, flags, pmd_size);
+			gmem_test(migrate_folio, vm, flags);
 		} else {
 			gmem_test(fault_private, vm, flags);
 		}

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 2/3] KVM: guest_memfd: support folio migration for non-confidential VMs
From: Shivank Garg @ 2026-06-11 13:05 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Jan Kara, Andrew Morton, Vlastimil Babka,
	Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
	Johannes Weiner, Zi Yan, David Hildenbrand, Matthew Brost,
	Joshua Hahn, Rakie Kim, Byungchul Park, Gregory Price, Ying Huang,
	Alistair Popple, Paolo Bonzini, Shuah Khan, Chao Peng,
	Nikunj A Dadhania, Ira Weiny, Michael Roth, Pankaj Gupta,
	Ackerley Tng, Fuad Tabba, Sean Christopherson, Vishal Annapurve,
	Nikita Kalyazin, Patrick Roy, Pratik Sampat, Ashish Kalra
  Cc: linux-fsdevel, linux-coco, linux-mm, linux-kernel, kvm,
	linux-kselftest, Shivank Garg
In-Reply-To: <20260611-shivank-gmem-migrate-v1-0-2d266bfc6f95@amd.com>

guest_memfd folios are currently marked unmmovable, so the kernel
cannot perform NUMA-balancing, memory compaction, etc.
This is unavoidable for confidential VMs (SEV-SNP, TDX),
since memory is encrypted and copying it need firmware assistance.
However, for non-cofidential VMs (like firecracker), we can migrate
the folios.

Mark non-confidential VMs as movable and implement
kvm_gmem_migrate_folio() using filemap_migrate_folio().

This lays the ground work for migrating cofidential guest_memfd
later. Once the firmware-assisted copying support is available,
those VMs can be made movable. The confidential folio content can
be copied separately, and the destination folio can be marked with
FOLIO_CONTENT_COPIED so __migrate_folio() skips the host-side
folio_mc_copy().

Signed-off-by: Shivank Garg <shivankg@amd.com>
---
 virt/kvm/guest_memfd.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 806a42f0e031a1c7729f53c786316d2502532553..e4470106fc7792f328bce5275419683328c8b4ab 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -487,13 +487,45 @@ static struct file_operations kvm_gmem_fops = {
 	.fallocate	= kvm_gmem_fallocate,
 };
 
+#ifdef CONFIG_MIGRATION
 static int kvm_gmem_migrate_folio(struct address_space *mapping,
 				  struct folio *dst, struct folio *src,
 				  enum migrate_mode mode)
 {
-	WARN_ON_ONCE(1);
-	return -EINVAL;
+	struct inode *inode = mapping->host;
+	pgoff_t start, end;
+	int ret;
+
+	if (!filemap_invalidate_trylock_shared(mapping))
+		return -EAGAIN;
+
+	start = src->index;
+	end = start + folio_nr_pages(src);
+
+	kvm_gmem_invalidate_begin(inode, start, end);
+
+	/*
+	 * For non-confidential guest_memfd the folio is host-readable,
+	 * so filemap_migrate_folio() can copy the contents itself via
+	 * folio_mc_copy().
+	 *
+	 * This is also the hook point for confidential VMs (SEV-SNP, TDX) once
+	 * they are made movable: the host cannot copy encrypted/private memory,
+	 * so a firmware-assisted copy would run here.
+	 * Idea: https://lore.kernel.org/r/20260428155043.39251-8-shivankg@amd.com
+	 * Mark the @dst->migrate_info field with FOLIO_CONTENT_COPIED, so
+	 * __migrate_folio() skip folio_mc_copy() for confidential VMs.
+	 */
+	ret = filemap_migrate_folio(mapping, dst, src, mode);
+
+	kvm_gmem_invalidate_end(inode, start, end);
+
+	filemap_invalidate_unlock_shared(mapping);
+	return ret;
 }
+#else
+#define kvm_gmem_migrate_folio NULL
+#endif
 
 static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio)
 {
@@ -592,9 +624,17 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 	inode->i_size = size;
 	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
 	mapping_set_inaccessible(inode->i_mapping);
-	mapping_set_unmovable(inode->i_mapping);
-	/* Unmovable mappings are supposed to be marked unevictable as well. */
-	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
+
+	/*
+	 * Confidential VMs (SEV-SNP, TDX) bind encryption to the physical
+	 * address and require firmware assisted copy, so their folios cannot
+	 * be migrated yet.
+	 */
+	if (kvm_arch_has_private_mem(kvm)) {
+		mapping_set_unmovable(inode->i_mapping);
+		/* Unmovable mappings are supposed to be marked unevictable as well. */
+		WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
+	}
 
 	GMEM_I(inode)->flags = flags;
 

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 1/3] mm: split AS_UNMOVABLE back out of AS_INACCESSIBLE
From: Shivank Garg @ 2026-06-11 13:05 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Jan Kara, Andrew Morton, Vlastimil Babka,
	Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
	Johannes Weiner, Zi Yan, David Hildenbrand, Matthew Brost,
	Joshua Hahn, Rakie Kim, Byungchul Park, Gregory Price, Ying Huang,
	Alistair Popple, Paolo Bonzini, Shuah Khan, Chao Peng,
	Nikunj A Dadhania, Ira Weiny, Michael Roth, Pankaj Gupta,
	Ackerley Tng, Fuad Tabba, Sean Christopherson, Vishal Annapurve,
	Nikita Kalyazin, Patrick Roy, Pratik Sampat, Ashish Kalra
  Cc: linux-fsdevel, linux-coco, linux-mm, linux-kernel, kvm,
	linux-kselftest, Shivank Garg
In-Reply-To: <20260611-shivank-gmem-migrate-v1-0-2d266bfc6f95@amd.com>

Commit 27e6a24a4cf3 ("mm, virt: merge AS_UNMOVABLE and AS_INACCESSIBLE")
folded the two flags into one, on the grounds that guest_memfd was the
only user and always set both. But the two flags were added for
different reasons and guard different things:

  AS_UNMOVABLE (0003e2a41468) marks a mapping whose folios cannot be
  migrated.

  AS_INACCESSIBLE (c72ceafbd12c) marks a mapping whose contents must
  not be directly R/W accessed. Its only job is to stop
  truncate_inode_partial_folio() from zeroing the folio.

The merge assumed unmovable and inaccessible were the same thing.
This cannot express a mapping that is inaccessible yet still movable,
which is exactly what guest_memfd wants.

Reintroduce AS_UNMOVABLE and restore the original split: truncate keeps
checking AS_INACCESSIBLE, while migration and compaction go back to
checking AS_UNMOVABLE.

Currently guest_memfd sets both, so the resulting flags and behaviour
are unchanged. Preparatory change to support folio migration for
non-confidential guest_memfd VMs.

Signed-off-by: Shivank Garg <shivankg@amd.com>
---
 include/linux/pagemap.h | 24 ++++++++++++++++++++----
 mm/compaction.c         | 12 ++++++------
 mm/migrate.c            |  2 +-
 virt/kvm/guest_memfd.c  |  1 +
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 31a848485ad9d9850d37185418349b89e6efe420..17f5abfa6e7be97c0dcb634346f21ce076798495 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -210,6 +210,7 @@ enum mapping_flags {
 	AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
 	AS_KERNEL_FILE = 10,	/* mapping for a fake kernel file that shouldn't
 				   account usage to user cgroups */
+	AS_UNMOVABLE = 11,	/* The mapping cannot be moved, ever */
 	/* Bits 16-25 are used for FOLIO_ORDER */
 	AS_FOLIO_ORDER_BITS = 5,
 	AS_FOLIO_ORDER_MIN = 16,
@@ -322,11 +323,10 @@ static inline void mapping_clear_stable_writes(struct address_space *mapping)
 static inline void mapping_set_inaccessible(struct address_space *mapping)
 {
 	/*
-	 * It's expected inaccessible mappings are also unevictable. Compaction
-	 * migrate scanner (isolate_migratepages_block()) relies on this to
-	 * reduce page locking.
+	 * The mapping's contents must not be accessed by the CPU through
+	 * the kernel direct map or other internal paths (e.g. zeroing of
+	 * pages during truncation).
 	 */
-	set_bit(AS_UNEVICTABLE, &mapping->flags);
 	set_bit(AS_INACCESSIBLE, &mapping->flags);
 }
 
@@ -335,6 +335,22 @@ static inline bool mapping_inaccessible(const struct address_space *mapping)
 	return test_bit(AS_INACCESSIBLE, &mapping->flags);
 }
 
+static inline void mapping_set_unmovable(struct address_space *mapping)
+{
+	/*
+	 * It's expected unmovable mappings are also unevictable. Compaction
+	 * migrate scanner (isolate_migratepages_block()) relies on this to
+	 * reduce page locking.
+	 */
+	set_bit(AS_UNEVICTABLE, &mapping->flags);
+	set_bit(AS_UNMOVABLE, &mapping->flags);
+}
+
+static inline bool mapping_unmovable(const struct address_space *mapping)
+{
+	return test_bit(AS_UNMOVABLE, &mapping->flags);
+}
+
 static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping)
 {
 	set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
diff --git a/mm/compaction.c b/mm/compaction.c
index 3648ce22c80728b894cffce502d8caa3e4532406..8262f08c01ff407eff8732ffe1d0eb4de469eaf2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1133,22 +1133,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (((mode & ISOLATE_ASYNC_MIGRATE) && is_dirty) ||
 		    (mapping && is_unevictable)) {
 			bool migrate_dirty = true;
-			bool is_inaccessible;
+			bool is_unmovable;
 
 			/*
 			 * Only folios without mappings or that have
 			 * a ->migrate_folio callback are possible to migrate
 			 * without blocking.
 			 *
-			 * Folios from inaccessible mappings are not migratable.
+			 * Folios from unmovable mappings are not migratable.
 			 *
 			 * However, we can be racing with truncation, which can
 			 * free the mapping that we need to check. Truncation
 			 * holds the folio lock until after the folio is removed
 			 * from the page so holding it ourselves is sufficient.
 			 *
-			 * To avoid locking the folio just to check inaccessible,
-			 * assume every inaccessible folio is also unevictable,
+			 * To avoid locking the folio just to check unmovable,
+			 * assume every unmovable folio is also unevictable,
 			 * which is a cheaper test.  If our assumption goes
 			 * wrong, it's not a correctness bug, just potentially
 			 * wasted cycles.
@@ -1161,9 +1161,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 				migrate_dirty = !mapping ||
 						mapping->a_ops->migrate_folio;
 			}
-			is_inaccessible = mapping && mapping_inaccessible(mapping);
+			is_unmovable = mapping && mapping_unmovable(mapping);
 			folio_unlock(folio);
-			if (!migrate_dirty || is_inaccessible)
+			if (!migrate_dirty || is_unmovable)
 				goto isolate_fail_put;
 		}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 8a64291ab5b44c401e1e0356bf39588e7b5d7b0d..c81b3900b5afd150681d973484e71982a8936221 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1100,7 +1100,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
 
 	if (!mapping)
 		rc = migrate_folio(mapping, dst, src, mode);
-	else if (mapping_inaccessible(mapping))
+	else if (mapping_unmovable(mapping))
 		rc = -EOPNOTSUPP;
 	else if (mapping->a_ops->migrate_folio)
 		/*
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 69c9d6d546b287b4f75ef69868259c082ca50933..806a42f0e031a1c7729f53c786316d2502532553 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -592,6 +592,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 	inode->i_size = size;
 	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
 	mapping_set_inaccessible(inode->i_mapping);
+	mapping_set_unmovable(inode->i_mapping);
 	/* Unmovable mappings are supposed to be marked unevictable as well. */
 	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
 

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 0/3] KVM: guest_memfd: folio migration for non-confidential VMs
From: Shivank Garg @ 2026-06-11 13:05 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle), Jan Kara, Andrew Morton, Vlastimil Babka,
	Suren Baghdasaryan, Michal Hocko, Brendan Jackman,
	Johannes Weiner, Zi Yan, David Hildenbrand, Matthew Brost,
	Joshua Hahn, Rakie Kim, Byungchul Park, Gregory Price, Ying Huang,
	Alistair Popple, Paolo Bonzini, Shuah Khan, Chao Peng,
	Nikunj A Dadhania, Ira Weiny, Michael Roth, Pankaj Gupta,
	Ackerley Tng, Fuad Tabba, Sean Christopherson, Vishal Annapurve,
	Nikita Kalyazin, Patrick Roy, Pratik Sampat, Ashish Kalra
  Cc: linux-fsdevel, linux-coco, linux-mm, linux-kernel, kvm,
	linux-kselftest, Shivank Garg

guest_memfd folios are currently marked unmovable, so the kernel cannot
perform NUMA-balancing, memory compaction, etc. This is unavoidable for
confidential VMs (SEV-SNP, TDX), since memory is encrypted and copying it
needs firmware assistance. However, for non-confidential VMs (like
Firecracker), we can migrate the folios.

This series enables folio migration for non-confidential guest_memfd and
also lays the groundwork for migrating confidential guest_memfd later.
Once firmware-assisted copying support is available, those VMs can be
made movable, the confidential folio content can be copied separately,
and the destination folio marked with FOLIO_CONTENT_COPIED so
__migrate_folio() skips the host-side folio_mc_copy().

Testing
-------
Host: 7.1-rc7 + this, 2 NUMA nodes

- KVM selftest: allocate folios on node 0, migrate them to node 1 and
  back and verify resulting NUMA node and the folio contents at each
  step.

- Firecracker [1]: booted a microVM backed by guest_memfd. While the
  guest was running, forced host-side migration of its folios via
  migratepages(8) and explicit move_pages(2) of guest_memfd
  pages. Verify with /proc/firecracker_pid/numa_maps.

[1] https://github.com/firecracker-microvm/firecracker/tree/feature/secret-hiding
    and change builder.rs to remove GUEST_MEMFD_FLAG_NO_DIRECT_MAP from
    vm.create_guest_memfd()

Best regards,
Shivank

Signed-off-by: Shivank Garg <shivankg@amd.com>
---
Shivank Garg (3):
      mm: split AS_UNMOVABLE back out of AS_INACCESSIBLE
      KVM: guest_memfd: support folio migration for non-confidential VMs
      KVM: selftests: exercise guest_memfd folio migration

 include/linux/pagemap.h                        | 24 ++++++--
 mm/compaction.c                                | 12 ++--
 mm/migrate.c                                   |  2 +-
 tools/testing/selftests/kvm/guest_memfd_test.c | 77 ++++++++++++++++++++++++++
 virt/kvm/guest_memfd.c                         | 49 ++++++++++++++--
 5 files changed, 149 insertions(+), 15 deletions(-)
---
base-commit: 4549871118cf616eecdd2d939f78e3b9e1dddc48
change-id: 20260611-shivank-gmem-migrate-8c1c519b30a6

Best regards,
-- 
Shivank Garg <shivankg@amd.com>


^ permalink raw reply

* [PATCH v7 6/6] coco: guest: arm64: Replace dummy CCA device with sysfs ABI
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara
In-Reply-To: <20260611130429.295516-1-aneesh.kumar@kernel.org>

The SMCCC firmware driver now creates the arm-smccc platform device and
instantiates the CCA RSI auxiliary devices once the RSI ABI is discovered.
The arm64-specific arm-cca-dev platform device stub is therefore no longer
needed.

However, userspace has used the arm-cca-dev platform device to detect Arm
CCA Realm guests [1]. Removing it without a replacement would break that
detection and would also leave userspace depending on kernel device-model
details.

Add /sys/firmware/cca/realm_guest as a stable, architecture-provided ABI
for detecting whether the kernel is running as an Arm CCA Realm guest. The
file returns 1 in Realm world and 0 otherwise, similar to the existing s390
/sys/firmware/uv/prot_virt_guest interface for protected virtualization
guests.

Remove the dummy arm-cca-dev registration now that userspace has a
dedicated CCA Realm guest indicator, and document the new ABI in
Documentation/ABI/testing/sysfs-firmware-cca.

[1] https://lore.kernel.org/all/4a7d84b2-2ec4-4773-a2d5-7b63d5c683cf@arm.com

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 Documentation/ABI/testing/sysfs-firmware-cca | 10 +++++
 arch/arm64/kernel/rsi.c                      | 39 +++++++++++++++-----
 2 files changed, 39 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-cca

diff --git a/Documentation/ABI/testing/sysfs-firmware-cca b/Documentation/ABI/testing/sysfs-firmware-cca
new file mode 100644
index 000000000000..bf177d636b92
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-cca
@@ -0,0 +1,10 @@
+What:		/sys/firmware/cca/realm_guest
+Date:		May 2026
+Contact:	Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description:	Read-only. Indicates whether the kernel is running as an
+		Arm Confidential Compute Architecture (CCA) Realm guest.
+
+		The value is one of:
+
+		0: the kernel is not running as a Realm guest
+		1: the kernel is running as a Realm guest
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index da440f71bb64..a333029ddf08 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -9,6 +9,8 @@
 #include <linux/swiotlb.h>
 #include <linux/cc_platform.h>
 #include <linux/platform_device.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 
 #include <asm/io.h>
 #include <asm/mem_encrypt.h>
@@ -16,6 +18,7 @@
 #include <asm/rsi.h>
 
 static struct realm_config config;
+static struct kobject *cca_kobj;
 
 unsigned long prot_ns_shared;
 EXPORT_SYMBOL(prot_ns_shared);
@@ -160,17 +163,33 @@ void __init arm64_rsi_init(void)
 	static_branch_enable(&rsi_present);
 }
 
-static struct platform_device rsi_dev = {
-	.name = "arm-cca-dev",
-	.id = PLATFORM_DEVID_NONE
+static ssize_t cca_is_realm_guest(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", is_realm_world());
+}
+
+static struct kobj_attribute cca_realm_guest =
+	__ATTR(realm_guest, 0444, cca_is_realm_guest, NULL);
+
+static const struct attribute *cca_realm_attrs[] = {
+	&cca_realm_guest.attr,
+	NULL,
 };
 
-static int __init arm64_create_dummy_rsi_dev(void)
+static int __init realm_sysfs_init(void)
 {
-	if (is_realm_world() &&
-	    platform_device_register(&rsi_dev))
-		pr_err("failed to register rsi platform device\n");
-	return 0;
-}
+	int ret;
+
+	cca_kobj = kobject_create_and_add("cca", firmware_kobj);
+	if (!cca_kobj)
+		return -ENOMEM;
 
-arch_initcall(arm64_create_dummy_rsi_dev)
+	ret = sysfs_create_files(cca_kobj, cca_realm_attrs);
+	if (!ret)
+		return 0;
+
+	kobject_put(cca_kobj);
+	return ret;
+}
+device_initcall(realm_sysfs_init);
-- 
2.43.0


^ permalink raw reply related

* [PATCH v7 5/6] firmware: smccc: arm-cca-guest: Bind the TSM provider to an SMCCC device
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara
In-Reply-To: <20260611130429.295516-1-aneesh.kumar@kernel.org>

The Arm CCA guest TSM provider currently binds through the arm-cca-dev
platform device. Like arm-smccc-trng, this device is not an independent
platform resource; it is a software representation of the RSI firmware
service discovered through SMCCC.

Move RSI discovery into the SMCCC firmware driver. When the SMCCC conduit
is SMC and if RSI ABI version call is supported, create an arm-rsi-dev
SMCCC device. Convert the Arm CCA guest TSM provider to an SMCCC driver so
it binds to that discovered RSI service and keeps module autoloading
through the SMCCC device id table.

Keep the old arm-cca-dev platform-device registration for now. Userspace
has used that device as a Realm-guest indicator, so removing it is left to
a follow-up patch that adds a replacement sysfs ABI.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 arch/arm64/include/asm/rsi.h              |  2 -
 arch/arm64/kernel/rsi.c                   |  2 +-
 drivers/firmware/smccc/smccc.c            |  7 +++
 drivers/virt/coco/arm-cca-guest/Kconfig   |  1 +
 drivers/virt/coco/arm-cca-guest/arm-cca.c | 56 +++++++++++------------
 include/linux/arm-smccc-rsi.h             |  2 +
 6 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
index 88b50d660e85..5f9c8623183d 100644
--- a/arch/arm64/include/asm/rsi.h
+++ b/arch/arm64/include/asm/rsi.h
@@ -10,8 +10,6 @@
 #include <linux/jump_label.h>
 #include <asm/rsi_cmds.h>
 
-#define RSI_PDEV_NAME "arm-cca-dev"
-
 DECLARE_STATIC_KEY_FALSE(rsi_present);
 
 void __init arm64_rsi_init(void);
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index 92160f2e57ff..da440f71bb64 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -161,7 +161,7 @@ void __init arm64_rsi_init(void)
 }
 
 static struct platform_device rsi_dev = {
-	.name = RSI_PDEV_NAME,
+	.name = "arm-cca-dev",
 	.id = PLATFORM_DEVID_NONE
 };
 
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index a47696f3a5de..7127af3dbe5c 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -10,6 +10,7 @@
 #include <linux/arm-smccc.h>
 #include <linux/kernel.h>
 #include <linux/arm-smccc-bus.h>
+#include <linux/arm-smccc-rsi.h>
 
 #include <asm/archrandom.h>
 
@@ -94,6 +95,12 @@ static const struct smccc_device_info smccc_devices[] __initconst = {
 		.requires_smc   = false,
 		.device_name    = "arm-smccc-trng",
 	},
+
+	{
+		.func_id        = SMC_RSI_ABI_VERSION,
+		.requires_smc   = true,
+		.device_name    = RSI_DEV_NAME,
+	},
 };
 
 static bool __init smccc_probe_smccc_device(const struct smccc_device_info *smccc_dev)
diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig
index 3f0f013f03f1..ad7538750c5a 100644
--- a/drivers/virt/coco/arm-cca-guest/Kconfig
+++ b/drivers/virt/coco/arm-cca-guest/Kconfig
@@ -1,6 +1,7 @@
 config ARM_CCA_GUEST
 	tristate "Arm CCA Guest driver"
 	depends on ARM64
+	depends on HAVE_ARM_SMCCC_DISCOVERY
 	select TSM_REPORTS
 	help
 	  The driver provides userspace interface to request and
diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca.c b/drivers/virt/coco/arm-cca-guest/arm-cca.c
index 0bbd1fa53ee4..4f9289ccf498 100644
--- a/drivers/virt/coco/arm-cca-guest/arm-cca.c
+++ b/drivers/virt/coco/arm-cca-guest/arm-cca.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/arm-smccc.h>
+#include <linux/arm-smccc-bus.h>
 #include <linux/cc_platform.h>
 #include <linux/kernel.h>
 #include <linux/mod_devicetable.h>
@@ -189,16 +190,12 @@ static const struct tsm_report_ops arm_cca_tsm_report_ops = {
 	.report_new = arm_cca_report_new,
 };
 
-/**
- * arm_cca_guest_init - Register with the Trusted Security Module (TSM)
- * interface.
- *
- * Return:
- * * %0        - Registered successfully with the TSM interface.
- * * %-ENODEV  - The execution context is not an Arm Realm.
- * * %-EBUSY   - Already registered.
- */
-static int __init arm_cca_guest_init(void)
+static void unregister_cca_tsm_report(void *data)
+{
+	tsm_report_unregister(&arm_cca_tsm_report_ops);
+}
+
+static int cca_tsm_probe(struct arm_smccc_device *sdev)
 {
 	int ret;
 
@@ -206,30 +203,33 @@ static int __init arm_cca_guest_init(void)
 		return -ENODEV;
 
 	ret = tsm_report_register(&arm_cca_tsm_report_ops, NULL);
-	if (ret < 0)
-		pr_err("Error %d registering with TSM\n", ret);
+	if (ret < 0) {
+		dev_err_probe(&sdev->dev, ret, "Error registering with TSM\n");
+		return ret;
+	}
 
-	return ret;
-}
-module_init(arm_cca_guest_init);
+	ret = devm_add_action_or_reset(&sdev->dev, unregister_cca_tsm_report,
+				       NULL);
+	if (ret < 0) {
+		dev_err_probe(&sdev->dev, ret, "Error registering devm action\n");
+		return ret;
+	}
 
-/**
- * arm_cca_guest_exit - unregister with the Trusted Security Module (TSM)
- * interface.
- */
-static void __exit arm_cca_guest_exit(void)
-{
-	tsm_report_unregister(&arm_cca_tsm_report_ops);
+	return 0;
 }
-module_exit(arm_cca_guest_exit);
 
-/* modalias, so userspace can autoload this module when RSI is available */
-static const struct platform_device_id arm_cca_match[] __maybe_unused = {
-	{ RSI_PDEV_NAME, 0},
-	{ }
+static const struct arm_smccc_device_id cca_tsm_id_table[] = {
+	{ .name = RSI_DEV_NAME },
+	{}
 };
+MODULE_DEVICE_TABLE(arm_smccc, cca_tsm_id_table);
 
-MODULE_DEVICE_TABLE(platform, arm_cca_match);
+static struct arm_smccc_driver cca_tsm_driver = {
+	.name = KBUILD_MODNAME,
+	.probe = cca_tsm_probe,
+	.id_table = cca_tsm_id_table,
+};
+module_arm_smccc_driver(cca_tsm_driver);
 MODULE_AUTHOR("Sami Mujawar <sami.mujawar@arm.com>");
 MODULE_DESCRIPTION("Arm CCA Guest TSM Driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/arm-smccc-rsi.h b/include/linux/arm-smccc-rsi.h
index fddb77986f70..ae663aa8fd7f 100644
--- a/include/linux/arm-smccc-rsi.h
+++ b/include/linux/arm-smccc-rsi.h
@@ -8,6 +8,8 @@
 
 #include <linux/arm-smccc.h>
 
+#define RSI_DEV_NAME "arm-rsi-dev"
+
 /*
  * This file describes the Realm Services Interface (RSI) Application Binary
  * Interface (ABI) for SMC calls made from within the Realm to the RMM and
-- 
2.43.0


^ permalink raw reply related

* [PATCH v7 4/6] virt: coco: arm-cca-guest: Rename TSM report source file
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara
In-Reply-To: <20260611130429.295516-1-aneesh.kumar@kernel.org>

The Arm CCA guest driver currently only implements TSM report support, but
follow-up changes will add more TSM-related functionality to the same
module.

Rename arm-cca-guest.c to arm-cca.c and build it as an object of the
arm-cca-guest module. This leaves room for the module to grow additional
source files.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 drivers/virt/coco/arm-cca-guest/Makefile                    | 2 ++
 .../virt/coco/arm-cca-guest/{arm-cca-guest.c => arm-cca.c}  | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)
 rename drivers/virt/coco/arm-cca-guest/{arm-cca-guest.c => arm-cca.c} (97%)

diff --git a/drivers/virt/coco/arm-cca-guest/Makefile b/drivers/virt/coco/arm-cca-guest/Makefile
index 69eeba08e98a..778146148515 100644
--- a/drivers/virt/coco/arm-cca-guest/Makefile
+++ b/drivers/virt/coco/arm-cca-guest/Makefile
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest.o
+
+arm-cca-guest-y += arm-cca.o
diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca.c
similarity index 97%
rename from drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
rename to drivers/virt/coco/arm-cca-guest/arm-cca.c
index 8b6854e7a188..0bbd1fa53ee4 100644
--- a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
+++ b/drivers/virt/coco/arm-cca-guest/arm-cca.c
@@ -184,7 +184,7 @@ static int arm_cca_report_new(struct tsm_report *report, void *data)
 	return ret;
 }
 
-static const struct tsm_report_ops arm_cca_tsm_ops = {
+static const struct tsm_report_ops arm_cca_tsm_report_ops = {
 	.name = KBUILD_MODNAME,
 	.report_new = arm_cca_report_new,
 };
@@ -205,7 +205,7 @@ static int __init arm_cca_guest_init(void)
 	if (!is_realm_world())
 		return -ENODEV;
 
-	ret = tsm_report_register(&arm_cca_tsm_ops, NULL);
+	ret = tsm_report_register(&arm_cca_tsm_report_ops, NULL);
 	if (ret < 0)
 		pr_err("Error %d registering with TSM\n", ret);
 
@@ -219,7 +219,7 @@ module_init(arm_cca_guest_init);
  */
 static void __exit arm_cca_guest_exit(void)
 {
-	tsm_report_unregister(&arm_cca_tsm_ops);
+	tsm_report_unregister(&arm_cca_tsm_report_ops);
 }
 module_exit(arm_cca_guest_exit);
 
-- 
2.43.0


^ permalink raw reply related

* [PATCH v7 3/6] firmware: smccc: Move RSI definitions to include/linux
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara
In-Reply-To: <20260611130429.295516-1-aneesh.kumar@kernel.org>

The RSI SMCCC function IDs describe a firmware ABI and are not arm64
architecture specific definitions. Follow-up changes need to use them from
non-arch code, including drivers/firmware/smccc and the Arm CCA guest
driver.

Move the RSI SMCCC definitions from arch/arm64/include/asm/ to
include/linux/ so they can be shared with the driver code. This also
keeps the firmware interface outside architecture code, as requested [1].

[1] https://lore.kernel.org/all/agsNO9cc7H-b0H8L@willie-the-truck

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 arch/arm64/include/asm/rsi_cmds.h             | 74 +---------------
 .../virt/coco/arm-cca-guest/arm-cca-guest.c   |  2 +
 drivers/virt/coco/arm-cca-guest/rsi.h         | 84 +++++++++++++++++++
 .../linux/arm-smccc-rsi.h                     |  6 +-
 4 files changed, 90 insertions(+), 76 deletions(-)
 create mode 100644 drivers/virt/coco/arm-cca-guest/rsi.h
 rename arch/arm64/include/asm/rsi_smc.h => include/linux/arm-smccc-rsi.h (98%)

diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h
index 2c8763876dfb..633123a4e5d5 100644
--- a/arch/arm64/include/asm/rsi_cmds.h
+++ b/arch/arm64/include/asm/rsi_cmds.h
@@ -8,10 +8,9 @@
 
 #include <linux/arm-smccc.h>
 #include <linux/string.h>
+#include <linux/arm-smccc-rsi.h>
 #include <asm/memory.h>
 
-#include <asm/rsi_smc.h>
-
 #define RSI_GRANULE_SHIFT		12
 #define RSI_GRANULE_SIZE		(_AC(1, UL) << RSI_GRANULE_SHIFT)
 
@@ -88,75 +87,4 @@ static inline long rsi_set_addr_range_state(phys_addr_t start,
 	return res.a0;
 }
 
-/**
- * rsi_attestation_token_init - Initialise the operation to retrieve an
- * attestation token.
- *
- * @challenge:	The challenge data to be used in the attestation token
- *		generation.
- * @size:	Size of the challenge data in bytes.
- *
- * Initialises the attestation token generation and returns an upper bound
- * on the attestation token size that can be used to allocate an adequate
- * buffer. The caller is expected to subsequently call
- * rsi_attestation_token_continue() to retrieve the attestation token data on
- * the same CPU.
- *
- * Returns:
- *  On success, returns the upper limit of the attestation report size.
- *  Otherwise, -EINVAL
- */
-static inline long
-rsi_attestation_token_init(const u8 *challenge, unsigned long size)
-{
-	struct arm_smccc_1_2_regs regs = { 0 };
-
-	/* The challenge must be at least 32bytes and at most 64bytes */
-	if (!challenge || size < 32 || size > 64)
-		return -EINVAL;
-
-	regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT;
-	memcpy(&regs.a1, challenge, size);
-	arm_smccc_1_2_smc(&regs, &regs);
-
-	if (regs.a0 == RSI_SUCCESS)
-		return regs.a1;
-
-	return -EINVAL;
-}
-
-/**
- * rsi_attestation_token_continue - Continue the operation to retrieve an
- * attestation token.
- *
- * @granule: {I}PA of the Granule to which the token will be written.
- * @offset:  Offset within Granule to start of buffer in bytes.
- * @size:    The size of the buffer.
- * @len:     The number of bytes written to the buffer.
- *
- * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller
- * is expected to call rsi_attestation_token_init() before calling this
- * function to retrieve the attestation token.
- *
- * Return:
- * * %RSI_SUCCESS     - Attestation token retrieved successfully.
- * * %RSI_INCOMPLETE  - Token generation is not complete.
- * * %RSI_ERROR_INPUT - A parameter was not valid.
- * * %RSI_ERROR_STATE - Attestation not in progress.
- */
-static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule,
-							   unsigned long offset,
-							   unsigned long size,
-							   unsigned long *len)
-{
-	struct arm_smccc_res res;
-
-	arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE,
-			     granule, offset, size, 0, &res);
-
-	if (len)
-		*len = res.a1;
-	return res.a0;
-}
-
 #endif /* __ASM_RSI_CMDS_H */
diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
index 66d00b6ceb78..8b6854e7a188 100644
--- a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
+++ b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
@@ -14,6 +14,8 @@
 
 #include <asm/rsi.h>
 
+#include "rsi.h"
+
 /**
  * struct arm_cca_token_info - a descriptor for the token buffer.
  * @challenge:		Pointer to the challenge data
diff --git a/drivers/virt/coco/arm-cca-guest/rsi.h b/drivers/virt/coco/arm-cca-guest/rsi.h
new file mode 100644
index 000000000000..f7303f4bce17
--- /dev/null
+++ b/drivers/virt/coco/arm-cca-guest/rsi.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2026 ARM Ltd.
+ */
+
+#ifndef _VIRT_COCO_RSI_H_
+#define _VIRT_COCO_RSI_H_
+
+#include <linux/arm-smccc-rsi.h>
+
+/**
+ * rsi_attestation_token_init - Initialise the operation to retrieve an
+ * attestation token.
+ *
+ * @challenge:	The challenge data to be used in the attestation token
+ *		generation.
+ * @size:	Size of the challenge data in bytes.
+ *
+ * Initialises the attestation token generation and returns an upper bound
+ * on the attestation token size that can be used to allocate an adequate
+ * buffer. The caller is expected to subsequently call
+ * rsi_attestation_token_continue() to retrieve the attestation token data on
+ * the same CPU.
+ *
+ * Returns:
+ *  On success, returns the upper limit of the attestation report size.
+ *  Otherwise, -EINVAL
+ */
+static inline long
+rsi_attestation_token_init(const u8 *challenge, unsigned long size)
+{
+	struct arm_smccc_1_2_regs regs = { 0 };
+
+	/* The challenge must be at least 32bytes and at most 64bytes */
+	if (!challenge || size < 32 || size > 64)
+		return -EINVAL;
+
+	regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT;
+	memcpy(&regs.a1, challenge, size);
+	arm_smccc_1_2_smc(&regs, &regs);
+
+	if (regs.a0 == RSI_SUCCESS)
+		return regs.a1;
+
+	return -EINVAL;
+}
+
+/**
+ * rsi_attestation_token_continue - Continue the operation to retrieve an
+ * attestation token.
+ *
+ * @granule: {I}PA of the Granule to which the token will be written.
+ * @offset:  Offset within Granule to start of buffer in bytes.
+ * @size:    The size of the buffer.
+ * @len:     The number of bytes written to the buffer.
+ *
+ * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller
+ * is expected to call rsi_attestation_token_init() before calling this
+ * function to retrieve the attestation token.
+ *
+ * Return:
+ * * %RSI_SUCCESS     - Attestation token retrieved successfully.
+ * * %RSI_INCOMPLETE  - Token generation is not complete.
+ * * %RSI_ERROR_INPUT - A parameter was not valid.
+ * * %RSI_ERROR_STATE - Attestation not in progress.
+ */
+static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule,
+							   unsigned long offset,
+							   unsigned long size,
+							   unsigned long *len)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE,
+			     granule, offset, size, 0, &res);
+
+	if (len)
+		*len = res.a1;
+	return res.a0;
+}
+
+
+
+#endif
diff --git a/arch/arm64/include/asm/rsi_smc.h b/include/linux/arm-smccc-rsi.h
similarity index 98%
rename from arch/arm64/include/asm/rsi_smc.h
rename to include/linux/arm-smccc-rsi.h
index e19253f96c94..fddb77986f70 100644
--- a/arch/arm64/include/asm/rsi_smc.h
+++ b/include/linux/arm-smccc-rsi.h
@@ -3,8 +3,8 @@
  * Copyright (C) 2023 ARM Ltd.
  */
 
-#ifndef __ASM_RSI_SMC_H_
-#define __ASM_RSI_SMC_H_
+#ifndef __LINUX_ARM_SMCCC_RSI_H_
+#define __LINUX_ARM_SMCCC_RSI_H_
 
 #include <linux/arm-smccc.h>
 
@@ -190,4 +190,4 @@ struct realm_config {
  */
 #define SMC_RSI_HOST_CALL			SMC_RSI_FID(0x199)
 
-#endif /* __ASM_RSI_SMC_H_ */
+#endif /* __LINUX_ARM_SMCCC_RSI_H_ */
-- 
2.43.0


^ permalink raw reply related

* [PATCH v7 2/6] firmware: hwrng: arm_smccc_trng: Register as an SMCCC device
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara
In-Reply-To: <20260611130429.295516-1-aneesh.kumar@kernel.org>

The SMCCC TRNG interface is a firmware-provided SMCCC service rather than a
standalone platform device. Now that the SMCCC core has an SMCCC bus,
create an arm-smccc-trng device for the discovered TRNG service and convert
the hwrng driver to an SMCCC driver.

The SMCCC id table preserves module autoloading for systems where the TRNG
driver is built as a module.

The sysfs device path changes from the old smccc_trng platform-device path
to an arm-smccc device path. No known userspace dependency on the old path
was found; a Debian Code Search lookup for the existing platform-device
name/path did not find any users.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 arch/arm64/include/asm/archrandom.h     |  2 +-
 drivers/char/hw_random/arm_smccc_trng.c | 32 +++++++++-----
 drivers/firmware/smccc/smccc.c          | 58 +++++++++++++++++++++----
 3 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 8babfbe31f95..7605dd81bd1e 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -12,7 +12,7 @@
 
 extern bool smccc_trng_available;
 
-static inline bool __init smccc_probe_trng(void)
+static inline bool smccc_probe_trng(void)
 {
 	struct arm_smccc_res res;
 
diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c
index dcb8e7f37f25..8f7f9d830cf2 100644
--- a/drivers/char/hw_random/arm_smccc_trng.c
+++ b/drivers/char/hw_random/arm_smccc_trng.c
@@ -16,8 +16,10 @@
 #include <linux/device.h>
 #include <linux/hw_random.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
 #include <linux/arm-smccc.h>
+#include <linux/arm-smccc-bus.h>
+
+#include <asm/archrandom.h>
 
 #ifdef CONFIG_ARM64
 #define ARM_SMCCC_TRNG_RND	ARM_SMCCC_TRNG_RND64
@@ -94,29 +96,37 @@ static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 	return copied;
 }
 
-static int smccc_trng_probe(struct platform_device *pdev)
+static int smccc_trng_probe(struct arm_smccc_device *sdev)
 {
 	struct hwrng *trng;
 
-	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+	/* validate the minimum version requirement */
+	if (!smccc_probe_trng())
+		return -ENODEV;
+
+	trng = devm_kzalloc(&sdev->dev, sizeof(*trng), GFP_KERNEL);
 	if (!trng)
 		return -ENOMEM;
 
 	trng->name = "smccc_trng";
 	trng->read = smccc_trng_read;
 
-	return devm_hwrng_register(&pdev->dev, trng);
+	return devm_hwrng_register(&sdev->dev, trng);
 }
 
-static struct platform_driver smccc_trng_driver = {
-	.driver = {
-		.name		= "smccc_trng",
-	},
-	.probe		= smccc_trng_probe,
+static const struct arm_smccc_device_id smccc_trng_id_table[] = {
+	{ .name = "arm-smccc-trng" },
+	{}
+};
+MODULE_DEVICE_TABLE(arm_smccc, smccc_trng_id_table);
+
+static struct arm_smccc_driver smccc_trng_driver = {
+	.name	  = KBUILD_MODNAME,
+	.probe	  = smccc_trng_probe,
+	.id_table = smccc_trng_id_table,
 };
-module_platform_driver(smccc_trng_driver);
+module_arm_smccc_driver(smccc_trng_driver);
 
-MODULE_ALIAS("platform:smccc_trng");
 MODULE_AUTHOR("Andre Przywara");
 MODULE_DESCRIPTION("Arm SMCCC TRNG firmware interface support");
 MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index bdee057db2fd..a47696f3a5de 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -9,7 +9,8 @@
 #include <linux/init.h>
 #include <linux/arm-smccc.h>
 #include <linux/kernel.h>
-#include <linux/platform_device.h>
+#include <linux/arm-smccc-bus.h>
+
 #include <asm/archrandom.h>
 
 static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
@@ -81,16 +82,55 @@ bool arm_smccc_hypervisor_has_uuid(const uuid_t *hyp_uuid)
 }
 EXPORT_SYMBOL_GPL(arm_smccc_hypervisor_has_uuid);
 
+struct smccc_device_info {
+	u32 func_id;
+	bool requires_smc;
+	const char *device_name;
+};
+
+static const struct smccc_device_info smccc_devices[] __initconst = {
+	{
+		.func_id        = ARM_SMCCC_TRNG_VERSION,
+		.requires_smc   = false,
+		.device_name    = "arm-smccc-trng",
+	},
+};
+
+static bool __init smccc_probe_smccc_device(const struct smccc_device_info *smccc_dev)
+{
+	unsigned long ret;
+	struct arm_smccc_res res;
+
+	if (smccc_conduit == SMCCC_CONDUIT_NONE)
+		return false;
+
+	if (smccc_dev->requires_smc && smccc_conduit != SMCCC_CONDUIT_SMC)
+		return false;
+
+	arm_smccc_1_1_invoke(smccc_dev->func_id, &res);
+	ret = res.a0;
+
+	if ((s32)ret == SMCCC_RET_NOT_SUPPORTED)
+		return false;
+
+	return true;
+}
+
 static int __init smccc_devices_init(void)
 {
-	struct platform_device *pdev;
-
-	if (smccc_trng_available) {
-		pdev = platform_device_register_simple("smccc_trng", -1,
-						       NULL, 0);
-		if (IS_ERR(pdev))
-			pr_err("smccc_trng: could not register device: %ld\n",
-			       PTR_ERR(pdev));
+	struct arm_smccc_device *sdev;
+	const struct smccc_device_info *smccc_dev;
+
+	for (int i = 0; i < ARRAY_SIZE(smccc_devices); i++) {
+		smccc_dev = &smccc_devices[i];
+
+		if (!smccc_probe_smccc_device(smccc_dev))
+			continue;
+
+		sdev = arm_smccc_device_register(smccc_dev->device_name);
+		if (IS_ERR(sdev))
+			pr_err("%s: could not register device: %ld\n",
+			       smccc_dev->device_name, PTR_ERR(sdev));
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply related

* [PATCH v7 1/6] firmware: smccc: Add an Arm SMCCC bus
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara
In-Reply-To: <20260611130429.295516-1-aneesh.kumar@kernel.org>

SMCCC-discovered firmware services are currently represented by separate
platform devices, such as smccc_trng and arm-cca-dev. Those devices do not
represent independent DT/ACPI-described platform resources; they are
features of the SMCCC firmware interface.

Add an Arm SMCCC bus for services discovered through the SMCCC firmware
interface. The bus provides SMCCC device and driver registration helpers,
name-based matching, modalias generation, and a sysfs modalias attribute so
SMCCC service drivers can bind to discovered firmware services and autoload
as modules.

Follow-up changes can then register SMCCC firmware services as arm-smccc
devices instead of creating independent per-feature platform devices.

Based on arm_ffa code

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 drivers/firmware/smccc/Makefile   |   2 +-
 drivers/firmware/smccc/bus.c      | 164 ++++++++++++++++++++++++++++++
 include/linux/arm-smccc-bus.h     |  49 +++++++++
 include/linux/mod_devicetable.h   |  13 +++
 scripts/mod/devicetable-offsets.c |   3 +
 scripts/mod/file2alias.c          |   8 ++
 6 files changed, 238 insertions(+), 1 deletion(-)
 create mode 100644 drivers/firmware/smccc/bus.c
 create mode 100644 include/linux/arm-smccc-bus.h

diff --git a/drivers/firmware/smccc/Makefile b/drivers/firmware/smccc/Makefile
index 40d19144a860..68bbff1407b8 100644
--- a/drivers/firmware/smccc/Makefile
+++ b/drivers/firmware/smccc/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-obj-$(CONFIG_HAVE_ARM_SMCCC_DISCOVERY)	+= smccc.o kvm_guest.o
+obj-$(CONFIG_HAVE_ARM_SMCCC_DISCOVERY)	+= bus.o smccc.o kvm_guest.o
 obj-$(CONFIG_ARM_SMCCC_SOC_ID)	+= soc_id.o
diff --git a/drivers/firmware/smccc/bus.c b/drivers/firmware/smccc/bus.c
new file mode 100644
index 000000000000..fe7e893130ce
--- /dev/null
+++ b/drivers/firmware/smccc/bus.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Arm Limited
+ */
+
+#include <linux/arm-smccc-bus.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+
+static DEFINE_IDA(arm_smccc_bus_id);
+
+static int arm_smccc_bus_match(struct device *dev,
+		const struct device_driver *drv)
+{
+	const struct arm_smccc_device_id *id_table;
+	struct arm_smccc_device *smccc_dev = to_arm_smccc_device(dev);
+
+	id_table = to_arm_smccc_driver(drv)->id_table;
+	if (!id_table)
+		return 0;
+
+	while (id_table->name[0]) {
+		if (!strcmp(smccc_dev->name, id_table->name))
+			return 1;
+		id_table++;
+	}
+
+	return 0;
+}
+
+static int arm_smccc_bus_probe(struct device *dev)
+{
+	struct arm_smccc_driver *smccc_drv = to_arm_smccc_driver(dev->driver);
+
+	return smccc_drv->probe(to_arm_smccc_device(dev));
+}
+
+static void arm_smccc_bus_remove(struct device *dev)
+{
+	struct arm_smccc_driver *smcc_drv = to_arm_smccc_driver(dev->driver);
+
+	if (smcc_drv->remove)
+		smcc_drv->remove(to_arm_smccc_device(dev));
+}
+
+static int arm_smccc_bus_uevent(const struct device *dev,
+		struct kobj_uevent_env *env)
+{
+	const struct arm_smccc_device *smccc_dev = to_arm_smccc_device(dev);
+
+	return add_uevent_var(env, "MODALIAS=" ARM_SMCCC_MODULE_PREFIX "%s",
+			      smccc_dev->name);
+}
+
+static ssize_t modalias_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct arm_smccc_device *smccc_dev = to_arm_smccc_device(dev);
+
+	return sysfs_emit(buf, ARM_SMCCC_MODULE_PREFIX "%s\n", smccc_dev->name);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *arm_smccc_device_attrs[] = {
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(arm_smccc_device);
+
+const struct bus_type arm_smccc_bus_type = {
+	.name = "arm_smccc",
+	.match = arm_smccc_bus_match,
+	.probe = arm_smccc_bus_probe,
+	.remove = arm_smccc_bus_remove,
+	.uevent = arm_smccc_bus_uevent,
+	.dev_groups = arm_smccc_device_groups,
+};
+EXPORT_SYMBOL_GPL(arm_smccc_bus_type);
+
+int arm_smccc_driver_register(struct arm_smccc_driver *driver,
+		struct module *owner, const char *mod_name)
+{
+	if (!driver->probe)
+		return -EINVAL;
+
+	driver->driver.bus = &arm_smccc_bus_type;
+	driver->driver.name = driver->name;
+	driver->driver.owner = owner;
+	driver->driver.mod_name = mod_name;
+
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(arm_smccc_driver_register);
+
+void arm_smccc_driver_unregister(struct arm_smccc_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(arm_smccc_driver_unregister);
+
+static void arm_smccc_release_device(struct device *dev)
+{
+	struct arm_smccc_device *smccc_dev = to_arm_smccc_device(dev);
+
+	ida_free(&arm_smccc_bus_id, smccc_dev->id);
+	kfree(smccc_dev);
+}
+
+struct arm_smccc_device *arm_smccc_device_register(const char *name)
+{
+	struct arm_smccc_device *smccc_dev;
+	int id, ret;
+
+	id = ida_alloc_min(&arm_smccc_bus_id, 1, GFP_KERNEL);
+	if (id < 0)
+		return ERR_PTR(id);
+
+	smccc_dev = kzalloc_obj(*smccc_dev);
+	if (!smccc_dev) {
+		ida_free(&arm_smccc_bus_id, id);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	smccc_dev->id = id;
+	if (strscpy(smccc_dev->name, name) < 0) {
+		kfree(smccc_dev);
+		ida_free(&arm_smccc_bus_id, id);
+		return ERR_PTR(-EINVAL);
+	}
+	smccc_dev->dev.bus = &arm_smccc_bus_type;
+	smccc_dev->dev.release = arm_smccc_release_device;
+
+	ret = dev_set_name(&smccc_dev->dev, "%s-%d", smccc_dev->name, id);
+	if (ret) {
+		kfree(smccc_dev);
+		ida_free(&arm_smccc_bus_id, id);
+		return ERR_PTR(ret);
+	}
+
+	ret = device_register(&smccc_dev->dev);
+	if (ret) {
+		put_device(&smccc_dev->dev);
+		return ERR_PTR(ret);
+	}
+
+	return smccc_dev;
+}
+EXPORT_SYMBOL_GPL(arm_smccc_device_register);
+
+void arm_smccc_device_unregister(struct arm_smccc_device *smccc_dev)
+{
+	if (!smccc_dev)
+		return;
+
+	device_unregister(&smccc_dev->dev);
+}
+EXPORT_SYMBOL_GPL(arm_smccc_device_unregister);
+
+static int __init arm_smccc_bus_init(void)
+{
+	return bus_register(&arm_smccc_bus_type);
+}
+subsys_initcall(arm_smccc_bus_init);
+
diff --git a/include/linux/arm-smccc-bus.h b/include/linux/arm-smccc-bus.h
new file mode 100644
index 000000000000..188891441e57
--- /dev/null
+++ b/include/linux/arm-smccc-bus.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2026 Arm Limited
+ */
+#ifndef __LINUX_ARM_SMCCC_BUS_H
+#define __LINUX_ARM_SMCCC_BUS_H
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+
+struct arm_smccc_device {
+	int id;
+	char name[ARM_SMCCC_NAME_SIZE];
+	struct device dev;
+};
+
+#define to_arm_smccc_device(d) container_of(d, struct arm_smccc_device, dev)
+
+struct arm_smccc_driver {
+	const char *name;
+	int (*probe)(struct arm_smccc_device *sdev);
+	void (*remove)(struct arm_smccc_device *sdev);
+	const struct arm_smccc_device_id *id_table;
+
+	struct device_driver driver;
+};
+
+#define to_arm_smccc_driver(d) \
+	container_of_const(d, struct arm_smccc_driver, driver)
+
+int arm_smccc_driver_register(struct arm_smccc_driver *driver,
+		struct module *owner, const char *mod_name);
+void arm_smccc_driver_unregister(struct arm_smccc_driver *driver);
+struct arm_smccc_device *arm_smccc_device_register(const char *name);
+void arm_smccc_device_unregister(struct arm_smccc_device *smcc_dev);
+
+#define arm_smccc_register(driver) \
+	arm_smccc_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#define arm_smccc_unregister(driver) \
+	arm_smccc_driver_unregister(driver)
+
+#define module_arm_smccc_driver(__arm_smccc_driver) \
+	module_driver(__arm_smccc_driver, arm_smccc_register, \
+		      arm_smccc_unregister)
+
+extern const struct bus_type arm_smccc_bus_type;
+
+#endif /* __LINUX_ARM_SMCCC_BUS_H */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 23ff24080dfd..c9cee8c5a0b2 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -876,6 +876,19 @@ struct auxiliary_device_id {
 	kernel_ulong_t driver_data;
 };
 
+#define ARM_SMCCC_NAME_SIZE 40
+#define ARM_SMCCC_MODULE_PREFIX "arm_smccc:"
+
+/**
+ * struct arm_smccc_device_id - Arm SMCCC bus device identifier
+ * @name: SMCCC device name
+ * @driver_data: driver data
+ */
+struct arm_smccc_device_id {
+	char name[ARM_SMCCC_NAME_SIZE];
+	kernel_ulong_t driver_data;
+};
+
 /* Surface System Aggregator Module */
 
 #define SSAM_MATCH_TARGET	0x1
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index b4178c42d08f..a485011ff137 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -254,6 +254,9 @@ int main(void)
 	DEVID(auxiliary_device_id);
 	DEVID_FIELD(auxiliary_device_id, name);
 
+	DEVID(arm_smccc_device_id);
+	DEVID_FIELD(arm_smccc_device_id, name);
+
 	DEVID(ssam_device_id);
 	DEVID_FIELD(ssam_device_id, match_flags);
 	DEVID_FIELD(ssam_device_id, domain);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 2ad87a74bb03..92d3917f27cc 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1323,6 +1323,13 @@ static void do_auxiliary_entry(struct module *mod, void *symval)
 	module_alias_printf(mod, false, AUXILIARY_MODULE_PREFIX "%s", *name);
 }
 
+static void do_arm_smccc_entry(struct module *mod, void *symval)
+{
+	DEF_FIELD_ADDR(symval, arm_smccc_device_id, name);
+
+	module_alias_printf(mod, false, ARM_SMCCC_MODULE_PREFIX "%s", *name);
+}
+
 /*
  * Looks like: ssam:dNcNtNiNfN
  *
@@ -1493,6 +1500,7 @@ static const struct devtable devtable[] = {
 	{"mhi", SIZE_mhi_device_id, do_mhi_entry},
 	{"mhi_ep", SIZE_mhi_device_id, do_mhi_ep_entry},
 	{"auxiliary", SIZE_auxiliary_device_id, do_auxiliary_entry},
+	{"arm_smccc", SIZE_arm_smccc_device_id, do_arm_smccc_entry},
 	{"ssam", SIZE_ssam_device_id, do_ssam_entry},
 	{"dfl", SIZE_dfl_device_id, do_dfl_entry},
 	{"ishtp", SIZE_ishtp_device_id, do_ishtp_entry},
-- 
2.43.0


^ permalink raw reply related

* [PATCH v7 0/6] Switch Arm SMCCC firmware services to an SMCCC bus
From: Aneesh Kumar K.V (Arm) @ 2026-06-11 13:04 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose, Andre Przywara

As discussed here:
https://lore.kernel.org/all/20250728135216.48084-12-aneesh.kumar@kernel.org

The earlier CCA guest support used an arm-cca-dev platform device as a pure
software anchor for the TSM class device. That platform device did not
correspond to a DT/ACPI described device, MMIO range, interrupt, or other
platform resource; it existed only to make the CCA guest driver bind and to
place the resulting TSM device in the driver model. The same pattern also
exists for smccc_trng. Creating separate platform devices for such
SMCCC-discovered features is misleading, because those features are not
independent platform devices.

This series adds an Arm SMCCC bus for services discovered through the SMCCC
firmware interface. The bus provides SMCCC device and driver registration
helpers, name-based matching, uevent modalias generation, and a sysfs modalias
attribute. SMCCC service drivers can use MODULE_DEVICE_TABLE(arm_smccc, ...)
to emit arm_smccc:<name> aliases, allowing userspace to autoload service
drivers when the SMCCC core registers matching firmware-service devices.

The series then moves SMCCC TRNG and the Arm CCA guest RSI service off the
platform bus. When the SMCCC core discovers the corresponding firmware
service, it registers an arm-smccc device for that service. The hwrng
arm_smccc_trng driver and the Arm CCA guest TSM provider are converted to
SMCCC drivers that bind to those discovered devices.

The old arm-cca-dev platform device has also been used by userspace as a Realm
guest indicator. Removing it without a replacement would leave userspace
depending on an internal driver-binding device. This series therefore adds
/sys/firmware/cca/realm_guest as a stable, architecture-provided ABI for
detecting whether the kernel is running as an Arm CCA Realm guest, and then
removes the dummy arm-cca-dev platform-device registration.

Changes since v6:
* Move SMCCC bus-related code to bus.c.
* Remove CONFIG_ARM64 #ifdefs and switch device creation to use the generic function-ID support framework.
* Move version-specific checks and other conditionals to the device driver probe routines.
* Move RSI definitions to include/linux/arm-smccc-rsi.h.
* Split the file and variable renames into a separate patch.

Changes from v5:
https://lore.kernel.org/all/20260514094030.42495-1-aneesh.kumar@kernel.org
* Replace the arm-smccc platform-device plus auxiliary-child model with a
  dedicated Arm SMCCC bus.
* Add SMCCC module alias support so SMCCC service drivers can use
  MODULE_DEVICE_TABLE(arm_smccc, ...) and autoload through arm_smccc:<name>
  aliases.
* Convert smccc_trng from a platform driver to an SMCCC driver.
* Convert the Arm CCA guest TSM provider from the arm-cca-dev platform device
  to an SMCCC driver bound to the discovered RSI service.
* Add /sys/firmware/cca/realm_guest before removing the old arm-cca-dev dummy
  platform device.

Changes from v4:
https://lore.kernel.org/all/20260427061615.905018-1-aneesh.kumar@kernel.org
* Add /sys/firmware/cca/realm_guest for detecting realm guest
* Convert smccc_trng to auxiliary device from platform device

Changes from v3:
https://lore.kernel.org/all/20260309100507.2303361-1-aneesh.kumar@kernel.org
* Rebased onto the latest kernel
* Drop pr_fmt() from drivers/firmware/smccc/rmm.c

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Lorenzo Pieralisi <lpieralisi@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
Cc: Andre Przywara <andre.przywara@arm.com>

Aneesh Kumar K.V (Arm) (6):
  firmware: smccc: Add an Arm SMCCC bus
  firmware: hwrng: arm_smccc_trng: Register as an SMCCC device
  firmware: smccc: Move RSI definitions to include/linux
  virt: coco: arm-cca-guest: Rename TSM report source file
  firmware: smccc: arm-cca-guest: Bind the TSM provider to an SMCCC
    device
  coco: guest: arm64: Replace dummy CCA device with sysfs ABI

 Documentation/ABI/testing/sysfs-firmware-cca  |  10 ++
 arch/arm64/include/asm/archrandom.h           |   2 +-
 arch/arm64/include/asm/rsi.h                  |   2 -
 arch/arm64/include/asm/rsi_cmds.h             |  74 +-------
 arch/arm64/kernel/rsi.c                       |  39 +++--
 drivers/char/hw_random/arm_smccc_trng.c       |  32 ++--
 drivers/firmware/smccc/Makefile               |   2 +-
 drivers/firmware/smccc/bus.c                  | 164 ++++++++++++++++++
 drivers/firmware/smccc/smccc.c                |  65 ++++++-
 drivers/virt/coco/arm-cca-guest/Kconfig       |   1 +
 drivers/virt/coco/arm-cca-guest/Makefile      |   2 +
 .../{arm-cca-guest.c => arm-cca.c}            |  62 +++----
 drivers/virt/coco/arm-cca-guest/rsi.h         |  84 +++++++++
 include/linux/arm-smccc-bus.h                 |  49 ++++++
 .../linux/arm-smccc-rsi.h                     |   8 +-
 include/linux/mod_devicetable.h               |  13 ++
 scripts/mod/devicetable-offsets.c             |   3 +
 scripts/mod/file2alias.c                      |   8 +
 18 files changed, 480 insertions(+), 140 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-cca
 create mode 100644 drivers/firmware/smccc/bus.c
 rename drivers/virt/coco/arm-cca-guest/{arm-cca-guest.c => arm-cca.c} (85%)
 create mode 100644 drivers/virt/coco/arm-cca-guest/rsi.h
 create mode 100644 include/linux/arm-smccc-bus.h
 rename arch/arm64/include/asm/rsi_smc.h => include/linux/arm-smccc-rsi.h (97%)


base-commit: ddd664bbff63e09e7a7f9acae9c43605d4cf185f
-- 
2.43.0


^ permalink raw reply

* Re: [PATCH v6 04/20] dma-pool: track decrypted atomic pools and select them via attrs
From: Petr Tesarik @ 2026-06-11 11:50 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Aneesh Kumar K.V, iommu, linux-arm-kernel, linux-kernel,
	linux-coco, Robin Murphy, Marek Szyprowski, Will Deacon,
	Marc Zyngier, Steven Price, Suzuki K Poulose, Catalin Marinas,
	Jiri Pirko, Mostafa Saleh, Alexey Kardashevskiy, Dan Williams,
	Xu Yilun, linuxppc-dev, linux-s390, Madhavan Srinivasan,
	Michael Ellerman, Nicholas Piggin, Christophe Leroy (CS GROUP),
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86, Jiri Pirko,
	Michael Kelley
In-Reply-To: <20260611113740.GB1066031@ziepe.ca>

On Thu, 11 Jun 2026 08:37:40 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Thu, Jun 11, 2026 at 10:55:47AM +0530, Aneesh Kumar K.V wrote:
> > Jason Gunthorpe <jgg@ziepe.ca> writes:
> >   
> > > The sashiko note does look legit though:
> > >
> > > 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > > 	    !gfpflags_allow_blocking(gfp) && !coherent) {
> > > 		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
> > > 					   gfp, attrs, NULL);
> > > 		if (!page)
> > > 			return NULL;
> > >
> > > I don't see anything doing the force_dma_unencrypted test along this
> > > callchain..
> > >
> > > I guess it should be done one step up in dma_alloc_attrs() instead of
> > > in dma_direct_alloc()?
> > >  
> > 
> > I think we should do something similar to what dma_map_phys() does here,
> > considering that we only support DMA direct with DMA_ATTR_CC_SHARED/DMA_ATTR_ALLOC_CC_SHARED.  
> 
> Yeah, I think that's the right idea for now..
> 
> > +	if (force_dma_unencrypted(dev))
> > +		attrs |= DMA_ATTR_ALLOC_CC_SHARED;
> > +
> > +	is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
> > +
> >  	if (dma_alloc_direct(dev, ops) || arch_dma_alloc_direct(dev)) {
> >  		cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
> > +	} else if (is_cc_shared) {
> > +		trace_dma_alloc(dev, NULL, 0, size, DMA_BIDIRECTIONAL, flag,
> > +				attrs);  
> 
> But it would be clearer to put the test in the iommu_ functions I
> think, since they are the ones that have the issue. We will need to
> fix it someday..
> 
> I think we can ignore the op-> functions, arches cannot support CC and
> still use dma_map_ops..

Hm, sounds reasonable. Should we probably enforce this at configure or
build time?

Petr T

^ permalink raw reply

* Re: [RFC PATCH] mm/vmalloc: add vmalloc_decrypted() and vzalloc_decrypted()
From: Jason Gunthorpe @ 2026-06-11 11:49 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Kameron Carr, akpm, urezki, linux-mm, linux-kernel, rppt,
	mhklinux, linux-coco, Suzuki K Poulose
In-Reply-To: <aibhnnDFQTHWMEFe@arm.com>

On Mon, Jun 08, 2026 at 04:37:02PM +0100, Catalin Marinas wrote:
> > +/**
> > + * vzalloc_decrypted - allocate zeroed virtually contiguous decrypted memory
> > + * @size:    allocation size
> > + *
> > + * Like vmalloc_decrypted(), but the memory is set to zero.
> > + *
> > + * Return: pointer to the allocated memory or %NULL on error
> > + */
> > +void *vzalloc_decrypted_noprof(unsigned long size)
> > +{
> > +	void *addr;
> > +
> > +	addr = __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
> > +					   GFP_KERNEL,
> > +					   pgprot_decrypted(PAGE_KERNEL),
> > +					   VM_DECRYPTED, NUMA_NO_NODE,
> > +					   __builtin_return_address(0));
> > +	if (addr)
> > +		memset(addr, 0, size);
> 
> Talking to Suzuki, the small window between set_memory_decrypted() and
> memset() potentially exposing stale data is safe, at least for Arm CCA
> as the memory would be scrubbed (there are other places in the kernel
> where we do something similar). I assume that's also the case for other
> architectures, although not sure what pKVM does.

It seems like a poor practice though, this should probably be
re-organized to use __GFP_ZERO so things are ordered sensibly.

But what is the purpose of this? I guess some hyperv thing - but
shouldn't we have a more structured way to "DMA map" things for the
hypervisor instead of stuff like this? Why can't you use
dma_alloc_coherent() which actually gives you an address that is
sensible to pass to the hypervisor?

Jason

^ permalink raw reply

* Re: [PATCH v6 04/20] dma-pool: track decrypted atomic pools and select them via attrs
From: Jason Gunthorpe @ 2026-06-11 11:37 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86, Jiri Pirko,
	Michael Kelley
In-Reply-To: <yq5aa4t1myw4.fsf@kernel.org>

On Thu, Jun 11, 2026 at 10:55:47AM +0530, Aneesh Kumar K.V wrote:
> Jason Gunthorpe <jgg@ziepe.ca> writes:
> 
> > The sashiko note does look legit though:
> >
> > 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> > 	    !gfpflags_allow_blocking(gfp) && !coherent) {
> > 		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
> > 					   gfp, attrs, NULL);
> > 		if (!page)
> > 			return NULL;
> >
> > I don't see anything doing the force_dma_unencrypted test along this
> > callchain..
> >
> > I guess it should be done one step up in dma_alloc_attrs() instead of
> > in dma_direct_alloc()?
> >
> 
> I think we should do something similar to what dma_map_phys() does here,
> considering that we only support DMA direct with DMA_ATTR_CC_SHARED/DMA_ATTR_ALLOC_CC_SHARED.

Yeah, I think that's the right idea for now..

> +	if (force_dma_unencrypted(dev))
> +		attrs |= DMA_ATTR_ALLOC_CC_SHARED;
> +
> +	is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
> +
>  	if (dma_alloc_direct(dev, ops) || arch_dma_alloc_direct(dev)) {
>  		cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
> +	} else if (is_cc_shared) {
> +		trace_dma_alloc(dev, NULL, 0, size, DMA_BIDIRECTIONAL, flag,
> +				attrs);

But it would be clearer to put the test in the iommu_ functions I
think, since they are the ones that have the issue. We will need to
fix it someday..

I think we can ignore the op-> functions, arches cannot support CC and
still use dma_map_ops..

Jason

^ permalink raw reply

* Re: [PATCH v6 04/20] dma-pool: track decrypted atomic pools and select them via attrs
From: Jason Gunthorpe @ 2026-06-11 11:30 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86, Jiri Pirko,
	Michael Kelley
In-Reply-To: <yq5acxxxn0gp.fsf@kernel.org>

On Thu, Jun 11, 2026 at 10:21:50AM +0530, Aneesh Kumar K.V wrote:

> If we are adding DMA_ATTR_ALLOC_SHARED, should we also allow
> dma_alloc_attrs() to take that attribute value?

I don't think we should..

It is hard to see any reason to allocate shared memory through the DMA
API. The way the DMA API works only the device that it is allocated
for can access that memory, so it is effectively private to the
device. Thus what purpose is shared device private memory?

> +DMA_ATTR_CC_SHARED
> +------------------
> +
> +This attribute indicates that a DMA mapping is shared, or decrypted, for
> +confidential computing guests. For normal system memory, the caller must
> +already have marked the memory decrypted with set_memory_decrypted(). CPU
> +PTEs for the mapping must use pgprot_decrypted(), and the same shared
> +semantic may be passed to a vIOMMU when it sets up the IOPTE.
> +
> +This attribute describes an existing mapping. It does not allocate shared
> +backing pages and must not be passed to dma_alloc_attrs(). For MMIO, use
> +this together with DMA_ATTR_MMIO to indicate shared MMIO. Unless
> +DMA_ATTR_MMIO is provided, the mapping requires a struct page.

Yes, though we need to fix a few ATTR_MMIO users to make this
statement true

> +DMA_ATTR_ALLOC_CC_SHARED
> +------------------------
> +
> +This attribute indicates that a dma_alloc_attrs() allocation must use
> +shared, or decrypted, backing pages for confidential computing guests.
> +Allocation paths use this request when they select shared DMA pools,
> +decrypt newly allocated pages or restore encryption on free.
> +
> +DMA_ATTR_ALLOC_CC_SHARED differs from DMA_ATTR_CC_SHARED in that it
> +requests shared backing memory from the allocation path. DMA_ATTR_CC_SHARED
> +describes an already-shared mapping and requires the caller to have
> +prepared normal system memory before mapping it. Callers that need shared
> +memory from dma_alloc_attrs() should request DMA_ATTR_ALLOC_CC_SHARED
> +instead of DMA_ATTR_CC_SHARED.

The semantic is right, but I would make it a private attribute since
no driver should use it.

Jason

^ permalink raw reply

* Re: [RFC PATCH v5 30/45] x86/virt/tdx: Add API to demote a 2MB mapping to 512 4KB mappings
From: Yan Zhao @ 2026-06-11  8:44 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	Kiryl Shutsemau, Paolo Bonzini, linux-kernel, linux-coco, kvm,
	Kai Huang, Rick Edgecombe, Vishal Annapurve, Ackerley Tng,
	Sagi Shahar, Binbin Wu, Xiaoyao Li, Isaku Yamahata
In-Reply-To: <20260129011517.3545883-31-seanjc@google.com>

> +u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, enum pg_level level, u64 pfn,
> +			struct page *new_sp, struct tdx_pamt_cache *pamt_cache,
> +			u64 *ext_err1, u64 *ext_err2)
> +{
> +	bool dpamt = tdx_supports_dynamic_pamt(&tdx_sysinfo) && level == PG_LEVEL_2M;
> +	u64 pamt_pa_array[MAX_NR_DPAMT_ARGS];
> +	struct tdx_module_args args = {
> +		.rcx = gpa | pg_level_to_tdx_sept_level(level),
> +		.rdx = tdx_tdr_pa(td),
> +		.r8 = page_to_phys(new_sp),
> +	};
> +	u64 ret;
> +
> +	if (!tdx_supports_demote_nointerrupt(&tdx_sysinfo))
> +		return TDX_SW_ERROR;
> +
> +	if (dpamt) {
> +		if (alloc_pamt_array(pamt_pa_array, pamt_cache))
> +			return TDX_SW_ERROR;
> +
> +		dpamt_copy_to_regs(&args, r12, pamt_pa_array);
> +	}
> +
> +	/* Flush the new S-EPT page to be added */
> +	tdx_clflush_page(new_sp);
> +
> +	ret = seamcall_saved_ret(TDH_MEM_PAGE_DEMOTE, &args);
Note for the next posting:

When DPAMT is enabled, part of the DEMOTE SEAMCALL performs the same function as
PAMT.ADD for the guest page, with the DPAMT page pair specified at args.r12 and
args.r13. So, DEMOTE has the same contention issue as PAMT.ADD [1].
Consider the following scenario:

      CPU 0                                     CPU 1

(1) DEMOTE adds pfn A1=0x1b090c,         (2) PAMT.ADD adds pfn YY, pfn ZZ as
pfn B1=0x119b4f as DPAMT pages            DPAMT pages for pfn A2=0x1b090d.
for guest page XX=0x511a00

(1) CPU0 needs to acquire a shared lock on page A1's 2MB PAMT entry.
    Since A1 and B1 are added as DPAMT pages, they don't necessarily have DPAMT
    pages installed for their own 2MB ranges.
(2) Assume there're no DPAMT pages installed for A1's 2MB range.
    CPU1 installs DPAMT pages YY, ZZ for page A2, acquiring an exclusive lock
    on page A2's 2MB PAMT entry.

Because pages A1 and A2 reside within the same 2MB range, either DEMOTE or
PAMT.ADD will return TDX_OPERAND_BUSY [2].

Though KVM holds write mmu_lock before invoking DEMOTE, which prevents
concurrent PAMT.ADD within one TD, the above BUSY error could occur if a second
TD invokes PAMT.ADD while the first TD is invoking DEMOTE.

So, fix this issue by acquiring the global pamt_lock around DEMOTE. See the new
implementation [3].

Since this contention should occur rarely (e.g., when there's a second TD
invoking PAMT.ADD concurrently while the first TD is invoking DEMOTE, and the
DPAMT page pair to add for DEMOTE must reside in the 2MB target range as
PAMT.ADD),  a possible optimization is to avoid holding the global pamt_lock in
the first invocation of tdh_mem_page_demote() (e.g., by indicating try or fast
mode); only acquire the global pamt_lock if the first try returns busy, ensuring
the second invocation must succeed.

[1] https://lore.kernel.org/kvm/aNX6V6OSIwly1hu4@yzhao56-desk.sh.intel.com
[2] The contention is verified with an internal POC.
    Error logs:
    a.1) DEMOTE adds PAMT pages pfn1=0x19c0a0, pfn2=0x1b572f for guest pfn=0x519800
      2) __tdx_pamt_get() adds PAMT pages for pfn=0x19c0a1.
      3) DEMOTE returns error 0x800002000000000c.
    b.1) DEMOTE adds PAMT pages pfn1=0x1b090c, pfn2=0x119b4f for guest pfn=0x511a00
      2) __tdx_pamt_get() adds PAMT pages for pfn=0x1b090d
      3) PAMT.ADD returns error 0x8000020000000001.

[3] New implementation:
u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, enum pg_level level, u64 pfn,
                        struct page *new_sp, struct tdx_pamt_cache *pamt_cache,  
                        u64 *ext_err1, u64 *ext_err2)                            
{                                                                                
        bool dpamt = tdx_supports_dynamic_pamt(&tdx_sysinfo) && level == PG_LEVEL_2M;
        struct page *pamt_pages[TDX_DPAMT_ENTRY_PAGE_CNT];                       
        struct tdx_module_args args = {                                          
                .rcx = gpa | pg_level_to_tdx_sept_level(level),                  
                .rdx = tdx_tdr_pa(td),                                           
                .r8 = page_to_phys(new_sp),                                      
        };                                                                       
        atomic_t *pamt_refcount;                                                 
        u64 ret;                                                                 
                                                                                 
        if (!tdx_supports_demote_nointerrupt(&tdx_sysinfo))                      
                return TDX_SW_ERROR;                                             
                                                                                 
        /* Flush the new S-EPT page to be added */                               
        tdx_clflush_page(new_sp);                                                
                                                                                 
        if (dpamt) {                                                             
                if (alloc_pamt_array(pamt_pages, pamt_cache))                    
                        return TDX_SW_ERROR;                                     
                                                                                 
                args.r12 = page_to_phys(pamt_pages[0]);                          
                args.r13 = page_to_phys(pamt_pages[1]);                          
                                                                                 
                /*                                                               
                 * Before demotion, the 2MB guest memory range is not managed    
                 * by DPAMT, so its pamt_refcount should be 0.                   
                 * Set it to 512 after demotion succeeds, since removing of each 
                 * 4KB mapping will reduce the refcount by 1.                    
                 */                                                              
                pamt_refcount = tdx_find_pamt_refcount(pfn);                     
                                                                                 
                spin_lock(&pamt_lock);                                           
        } 
	ret = seamcall_saved_ret(TDH_MEM_PAGE_DEMOTE, &args);

        if (dpamt) {
                if (!ret)
                        WARN_ON_ONCE(atomic_cmpxchg_release(pamt_refcount, 0, PTRS_PER_PMD));

                spin_unlock(&pamt_lock);

                if (ret)
                        free_pamt_array(pamt_pages);
        }

        *ext_err1 = args.rcx;
        *ext_err2 = args.rdx;

        return ret;
}


^ permalink raw reply

* Re: [PATCH v6 00/20] dma-mapping: Use DMA_ATTR_CC_SHARED through direct, pool and swiotlb paths
From: Aneesh Kumar K.V @ 2026-06-11  5:52 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Jiri Pirko, Jason Gunthorpe, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <aigYbK12D8uKQvJF@arm.com>

Catalin Marinas <catalin.marinas@arm.com> writes:

> On Thu, Jun 04, 2026 at 02:09:39PM +0530, Aneesh Kumar K.V (Arm) wrote:
>> This series propagates DMA_ATTR_CC_SHARED through the dma-direct,
>> dma-pool, and swiotlb paths so that encrypted and decrypted DMA buffers
>> are handled consistently.
>> 
>> Today, the direct DMA path mostly relies on force_dma_unencrypted() for
>> shared/decrypted buffer handling. This series consolidates the
>> force_dma_unencrypted() checks in the top-level functions and ensures
>> that the remaining DMA interfaces use DMA attributes to make the correct
>> decisions.
>
> Please check Sashiko's reports, it has some good points:
>
> https://sashiko.dev/#/patchset/20260604083959.1265923-1-aneesh.kumar@kernel.org
>
> I think the main one is the swiotlb_tbl_map_single() changes which break
> AMD SME host support. There cc_platform_has(CC_ATTR_MEM_ENCRYPT) is true
> but force_dma_unencrypted() is false. Normally you'd not end up on this
> path but you can have swiotlb=force.
>

I would consider the above similar to a trusted device requiring swiotlb
bouncing. At some point, based on real-world use cases, we may need to
add protected io_tlb_mem pools. We have not done that yet because no
such use case has come so far.

-aneesh

^ permalink raw reply

* Re: [PATCH v6 04/20] dma-pool: track decrypted atomic pools and select them via attrs
From: Aneesh Kumar K.V @ 2026-06-11  5:25 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86, Jiri Pirko,
	Michael Kelley
In-Reply-To: <20260609143242.GK2764304@ziepe.ca>

Jason Gunthorpe <jgg@ziepe.ca> writes:

> The sashiko note does look legit though:
>
> 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
> 	    !gfpflags_allow_blocking(gfp) && !coherent) {
> 		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
> 					   gfp, attrs, NULL);
> 		if (!page)
> 			return NULL;
>
> I don't see anything doing the force_dma_unencrypted test along this
> callchain..
>
> I guess it should be done one step up in dma_alloc_attrs() instead of
> in dma_direct_alloc()?
>

I think we should do something similar to what dma_map_phys() does here,
considering that we only support DMA direct with DMA_ATTR_CC_SHARED/DMA_ATTR_ALLOC_CC_SHARED.

@@ -637,6 +637,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	void *cpu_addr;
+	bool is_cc_shared;
 
 	WARN_ON_ONCE(!dev->coherent_dma_mask);
 
@@ -657,8 +658,17 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	/* let the implementation decide on the zone to allocate from: */
 	flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
 
+	if (force_dma_unencrypted(dev))
+		attrs |= DMA_ATTR_ALLOC_CC_SHARED;
+
+	is_cc_shared = attrs & DMA_ATTR_CC_SHARED;
+
 	if (dma_alloc_direct(dev, ops) || arch_dma_alloc_direct(dev)) {
 		cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
+	} else if (is_cc_shared) {
+		trace_dma_alloc(dev, NULL, 0, size, DMA_BIDIRECTIONAL, flag,
+				attrs);
+		return NULL;
 	} else if (use_dma_iommu(dev)) {
 		cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs);
 	} else if (ops->alloc) {

-aneesh

^ permalink raw reply

* Re: [PATCH v6 04/20] dma-pool: track decrypted atomic pools and select them via attrs
From: Aneesh Kumar K.V @ 2026-06-11  4:51 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86, Jiri Pirko,
	Michael Kelley
In-Reply-To: <20260610164153.GQ2764304@ziepe.ca>

Jason Gunthorpe <jgg@ziepe.ca> writes:

> On Wed, Jun 10, 2026 at 01:37:26PM +0530, Aneesh Kumar K.V wrote:
>> Jason Gunthorpe <jgg@ziepe.ca> writes:
>> 
>> > On Thu, Jun 04, 2026 at 02:09:43PM +0530, Aneesh Kumar K.V (Arm) wrote:
>> >>  struct page *dma_alloc_from_pool(struct device *dev, size_t size,
>> >> -		void **cpu_addr, gfp_t gfp,
>> >> +		void **cpu_addr, gfp_t gfp, unsigned long attrs,
>> >>  		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
>> >>  {
>> >> -	struct gen_pool *pool = NULL;
>> >> +	struct dma_gen_pool *dma_pool = NULL;
>> >>  	struct page *page;
>> >>  	bool pool_found = false;
>> >>  
>> >> -	while ((pool = dma_guess_pool(pool, gfp))) {
>> >> +	while ((dma_pool = dma_guess_pool(dma_pool, gfp))) {
>> >> +
>> >> +		if (dma_pool->unencrypted != !!(attrs & DMA_ATTR_CC_SHARED))
>> >> +			continue;
>> >
>> > I don't think you should be overloading DMA_ATTR_CC_SHARED like this.
>> >
>> > 	/*
>> > 	 * DMA_ATTR_CC_SHARED is not a caller-visible dma_alloc_*()
>> > 	 * attribute. The direct allocator uses it internally after it has
>> > 	 * decided that the backing pages must be shared/decrypted, so the
>> > 	 * rest of the allocation path can consistently select DMA addresses,
>> > 	 * choose compatible pools and restore encryption on free.
>> > 	 */
>> > 	if (attrs & DMA_ATTR_CC_SHARED)
>> > 		return NULL;
>> >
>> > 	if (force_dma_unencrypted(dev)) {
>> > 		attrs |= DMA_ATTR_CC_SHARED;
>> > 		mark_mem_decrypt = true;
>> > 	}
>> >
>> > It is fine to have a bit inside the attrs that is only used by the
>> > internal logic, but it needs to have a clearer name
>> > __DMA_ATTR_REQUIRE_CC_SHARED perhaps.
>> >
>> 
>> Are you suggesting adding another attribute in addition to
>> DMA_ATTR_CC_SHARED?
>> 
>> Is the idea that __DMA_ATTR_REQUIRE_CC_SHARED would be used in the
>> allocation path to request a CC_SHARED allocation, while
>> DMA_ATTR_CC_SHARED would be used in the mapping path to describe the
>> attribute of the address?
>
> Yeah, it is a thought at least
>
> Maybe a comment is good enough.
>
> I just find it hard to follow when we have this dual usage. Like the
> code above for dma_pool->unencrypted is completely wrong if it is an
> "attribute of an address". Easy to cut & paste that into the wrong
> context.
>
> Especially if you move things up higher.. having the alloc set both
> CC_SHARED and REQUIRE_CC_SHARED or maybe ALLOC_CC_SHARED would make it
> clearer that the alloc code lives under that callchain
>
> Jason
>

If we are adding DMA_ATTR_ALLOC_SHARED, should we also allow
dma_alloc_attrs() to take that attribute value?

Does this look okay? 
(Note: Parts of the documentation text were updated using Codex.)

modified   Documentation/core-api/dma-attributes.rst
@@ -179,3 +179,32 @@ interface when building their uAPIs, when possible.
 
 It must never be used in an in-kernel driver that only works with
 kernel memory.
+
+DMA_ATTR_CC_SHARED
+------------------
+
+This attribute indicates that a DMA mapping is shared, or decrypted, for
+confidential computing guests. For normal system memory, the caller must
+already have marked the memory decrypted with set_memory_decrypted(). CPU
+PTEs for the mapping must use pgprot_decrypted(), and the same shared
+semantic may be passed to a vIOMMU when it sets up the IOPTE.
+
+This attribute describes an existing mapping. It does not allocate shared
+backing pages and must not be passed to dma_alloc_attrs(). For MMIO, use
+this together with DMA_ATTR_MMIO to indicate shared MMIO. Unless
+DMA_ATTR_MMIO is provided, the mapping requires a struct page.
+
+DMA_ATTR_ALLOC_CC_SHARED
+------------------------
+
+This attribute indicates that a dma_alloc_attrs() allocation must use
+shared, or decrypted, backing pages for confidential computing guests.
+Allocation paths use this request when they select shared DMA pools,
+decrypt newly allocated pages or restore encryption on free.
+
+DMA_ATTR_ALLOC_CC_SHARED differs from DMA_ATTR_CC_SHARED in that it
+requests shared backing memory from the allocation path. DMA_ATTR_CC_SHARED
+describes an already-shared mapping and requires the caller to have
+prepared normal system memory before mapping it. Callers that need shared
+memory from dma_alloc_attrs() should request DMA_ATTR_ALLOC_CC_SHARED
+instead of DMA_ATTR_CC_SHARED.
modified   include/linux/dma-mapping.h
@@ -103,6 +103,13 @@
  */
 #define DMA_ATTR_CC_SHARED	(1UL << 13)
 
+/*
+ * DMA_ATTR_ALLOC_CC_SHARED: Allocates DMA memory as shared (decrypted) for
+ * confidential computing guests. Unlike DMA_ATTR_CC_SHARED, this attribute
+ * is used by dma_alloc_attrs() paths that create shared backing pages;
+ * DMA_ATTR_CC_SHARED describes an already-shared mapping.
+ */
+#define DMA_ATTR_ALLOC_CC_SHARED	(1UL << 14)
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
  * be given to a device to use as a DMA source or target.  It is specific to a

^ permalink raw reply

* Re: [PATCH v7 06/42] KVM: guest_memfd: Update kvm_gmem_populate() to use gmem attributes
From: Sean Christopherson @ 2026-06-10 22:23 UTC (permalink / raw)
  To: Ackerley Tng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	ira.weiny, jmattson, jthoughton, michael.roth, oupton,
	pankaj.gupta, qperret, rick.p.edgecombe, rientjes, shivankg,
	steven.price, tabba, willy, wyihan, yan.y.zhao, forkloop,
	pratyush, suzuki.poulose, aneesh.kumar, liam, Paolo Bonzini,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
	Kemeng Shi, Nhat Pham, Baoquan He, Barry Song, Axel Rasmussen,
	Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt,
	Kiryl Shutsemau, Jason Gunthorpe, Vlastimil Babka, kvm,
	linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm, linux-coco
In-Reply-To: <20260522-gmem-inplace-conversion-v7-6-2f0fae496530@google.com>

On Fri, May 22, 2026, Ackerley Tng wrote:
> Update the guest_memfd populate() flow to pull memory attributes from the
> gmem instance instead of the VM when KVM is not configured to track
> shared/private status in the VM.
> 
> Rename the per-VM API to make it clear that it retrieves per-VM
> attributes, i.e. is not suitable for use outside of flows that are
> specific to generic per-VM attributes.
> 
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Reviewed-by: Fuad Tabba <tabba@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>

We should squash this in with the previous patch, i.e. wire up PRIVATE to gmem
in a single patch (sans the ioctl support).  I had a hell of time figure out how
the range-based lookup was supposed to work when revisiting the "wire up" patch,
until I realized populate() was handled in the next patch.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox