Linux Confidential Computing Development
 help / color / mirror / Atom feed
* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Aneesh Kumar K.V @ 2026-05-14 14:43 UTC (permalink / raw)
  To: Mostafa Saleh
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Jason Gunthorpe,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <agXaby-7L7yS3Vva@google.com>

Mostafa Saleh <smostafa@google.com> writes:

> On Thu, May 14, 2026 at 06:18:05PM +0530, Aneesh Kumar K.V wrote:
>> Mostafa Saleh <smostafa@google.com> writes:
>> 
>> > On Thu, May 14, 2026 at 11:24:42AM +0530, Aneesh Kumar K.V wrote:
>> >> Mostafa Saleh <smostafa@google.com> writes:
>> >> 
>> >> > On Tue, May 12, 2026 at 02:33:59PM +0530, Aneesh Kumar K.V (Arm) wrote:
>> >> >> Teach swiotlb to distinguish between encrypted and decrypted bounce
>> >> >> buffer pools, and make allocation and mapping paths select a pool whose
>> >> >> state matches the requested DMA attributes.
>> >> >> 
>> >> >> Add a decrypted flag to io_tlb_mem, initialize it for the default and
>> >> >> restricted pools, and propagate DMA_ATTR_CC_SHARED into swiotlb pool
>> >> >> allocation. Reject swiotlb alloc/map requests when the selected pool does
>> >> >> not match the required encrypted/decrypted state.
>> >> >> 
>> >> >> Also return DMA addresses with the matching phys_to_dma_{encrypted,
>> >> >> unencrypted} helper so the DMA address encoding stays consistent with the
>> >> >> chosen pool.
>> >> >> 
>> >> >> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
>> >> >> ---
>> >> >>  include/linux/dma-direct.h |  10 ++++
>> >> >>  include/linux/swiotlb.h    |   8 ++-
>> >> >>  kernel/dma/direct.c        |  14 +++--
>> >> >>  kernel/dma/swiotlb.c       | 108 +++++++++++++++++++++++++++----------
>> >> >>  4 files changed, 107 insertions(+), 33 deletions(-)
>> >> >> 
>> >> >> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
>> >> >> index c249912456f9..94fad4e7c11e 100644
>> >> >> --- a/include/linux/dma-direct.h
>> >> >> +++ b/include/linux/dma-direct.h
>> >> >> @@ -77,6 +77,10 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map)
>> >> >>  #ifndef phys_to_dma_unencrypted
>> >> >>  #define phys_to_dma_unencrypted		phys_to_dma
>> >> >>  #endif
>> >> >> +
>> >> >> +#ifndef phys_to_dma_encrypted
>> >> >> +#define phys_to_dma_encrypted		phys_to_dma
>> >> >> +#endif
>> >> >>  #else
>> >> >>  static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
>> >> >>  {
>> >> >> @@ -90,6 +94,12 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
>> >> >>  {
>> >> >>  	return dma_addr_unencrypted(__phys_to_dma(dev, paddr));
>> >> >>  }
>> >> >> +
>> >> >> +static inline dma_addr_t phys_to_dma_encrypted(struct device *dev,
>> >> >> +		phys_addr_t paddr)
>> >> >> +{
>> >> >> +	return dma_addr_encrypted(__phys_to_dma(dev, paddr));
>> >> >> +}
>> >> >>  /*
>> >> >>   * If memory encryption is supported, phys_to_dma will set the memory encryption
>> >> >>   * bit in the DMA address, and dma_to_phys will clear it.
>> >> >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
>> >> >> index 3dae0f592063..b3fa3c6e0169 100644
>> >> >> --- a/include/linux/swiotlb.h
>> >> >> +++ b/include/linux/swiotlb.h
>> >> >> @@ -81,6 +81,7 @@ struct io_tlb_pool {
>> >> >>  	struct list_head node;
>> >> >>  	struct rcu_head rcu;
>> >> >>  	bool transient;
>> >> >> +	bool unencrypted;
>> >> >>  #endif
>> >> >>  };
>> >> >>  
>> >> >> @@ -111,6 +112,7 @@ struct io_tlb_mem {
>> >> >>  	struct dentry *debugfs;
>> >> >>  	bool force_bounce;
>> >> >>  	bool for_alloc;
>> >> >> +	bool unencrypted;
>> >> >>  #ifdef CONFIG_SWIOTLB_DYNAMIC
>> >> >>  	bool can_grow;
>> >> >>  	u64 phys_limit;
>> >> >> @@ -282,7 +284,8 @@ static inline void swiotlb_sync_single_for_cpu(struct device *dev,
>> >> >>  extern void swiotlb_print_info(void);
>> >> >>  
>> >> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
>> >> >> -struct page *swiotlb_alloc(struct device *dev, size_t size);
>> >> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
>> >> >> +		unsigned long attrs);
>> >> >>  bool swiotlb_free(struct device *dev, struct page *page, size_t size);
>> >> >>  
>> >> >>  static inline bool is_swiotlb_for_alloc(struct device *dev)
>> >> >> @@ -290,7 +293,8 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
>> >> >>  	return dev->dma_io_tlb_mem->for_alloc;
>> >> >>  }
>> >> >>  #else
>> >> >> -static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
>> >> >> +static inline struct page *swiotlb_alloc(struct device *dev, size_t size,
>> >> >> +		unsigned long attrs)
>> >> >>  {
>> >> >>  	return NULL;
>> >> >>  }
>> >> >> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
>> >> >> index dc2907439b3d..97ae4fa10521 100644
>> >> >> --- a/kernel/dma/direct.c
>> >> >> +++ b/kernel/dma/direct.c
>> >> >> @@ -104,9 +104,10 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page,
>> >> >>  	dma_free_contiguous(dev, page, size);
>> >> >>  }
>> >> >>  
>> >> >> -static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size)
>> >> >> +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size,
>> >> >> +		unsigned long attrs)
>> >> >>  {
>> >> >> -	struct page *page = swiotlb_alloc(dev, size);
>> >> >> +	struct page *page = swiotlb_alloc(dev, size, attrs);
>> >> >>  
>> >> >>  	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
>> >> >>  		swiotlb_free(dev, page, size);
>> >> >> @@ -266,8 +267,12 @@ void *dma_direct_alloc(struct device *dev, size_t size,
>> >> >>  						  gfp, attrs);
>> >> >>  
>> >> >>  	if (is_swiotlb_for_alloc(dev)) {
>> >> >> -		page = dma_direct_alloc_swiotlb(dev, size);
>> >> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
>> >> >>  		if (page) {
>> >> >> +			/*
>> >> >> +			 * swiotlb allocations comes from pool already marked
>> >> >> +			 * decrypted
>> >> >> +			 */
>> >> >>  			mark_mem_decrypt = false;
>> >> >>  			goto setup_page;
>> >> >>  		}
>> >> >> @@ -374,6 +379,7 @@ void dma_direct_free(struct device *dev, size_t size,
>> >> >>  		return;
>> >> >>  
>> >> >>  	if (swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)))
>> >> >> +		/* Swiotlb doesn't need a page attribute update on free */
>> >> >>  		mark_mem_encrypted = false;
>> >> >>  
>> >> >>  	if (is_vmalloc_addr(cpu_addr)) {
>> >> >> @@ -403,7 +409,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
>> >> >>  						  gfp, attrs);
>> >> >>  
>> >> >>  	if (is_swiotlb_for_alloc(dev)) {
>> >> >> -		page = dma_direct_alloc_swiotlb(dev, size);
>> >> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
>> >> >>  		if (!page)
>> >> >>  			return NULL;
>> >> >>  
>> >> >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
>> >> >> index ab4eccbaa076..065663be282c 100644
>> >> >> --- a/kernel/dma/swiotlb.c
>> >> >> +++ b/kernel/dma/swiotlb.c
>> >> >> @@ -259,10 +259,21 @@ void __init swiotlb_update_mem_attributes(void)
>> >> >>  	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
>> >> >>  	unsigned long bytes;
>> >> >>  
>> >> >> +	/*
>> >> >> +	 * if platform support memory encryption, swiotlb buffers are
>> >> >> +	 * decrypted by default.
>> >> >> +	 */
>> >> >> +	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
>> >> >> +		io_tlb_default_mem.unencrypted = true;
>> >> >> +	else
>> >> >> +		io_tlb_default_mem.unencrypted = false;
>> >> >> +
>> >> >>  	if (!mem->nslabs || mem->late_alloc)
>> >> >>  		return;
>> >> >>  	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
>> >> >> -	set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
>> >> >> +
>> >> >> +	if (io_tlb_default_mem.unencrypted)
>> >> >> +		set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
>> >> >>  }
>> >> >>  
>> >> >>  static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
>> >> >> @@ -505,8 +516,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
>> >> >>  	if (!mem->slots)
>> >> >>  		goto error_slots;
>> >> >>  
>> >> >> -	set_memory_decrypted((unsigned long)vstart,
>> >> >> -			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
>> >> >> +	if (io_tlb_default_mem.unencrypted)
>> >> >> +		set_memory_decrypted((unsigned long)vstart,
>> >> >> +				     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
>> >> >> +
>> >> >>  	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
>> >> >>  				 nareas);
>> >> >>  	add_mem_pool(&io_tlb_default_mem, mem);
>> >> >> @@ -539,7 +552,9 @@ void __init swiotlb_exit(void)
>> >> >>  	tbl_size = PAGE_ALIGN(mem->end - mem->start);
>> >> >>  	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
>> >> >>  
>> >> >> -	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
>> >> >> +	if (io_tlb_default_mem.unencrypted)
>> >> >> +		set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
>> >> >> +
>> >> >>  	if (mem->late_alloc) {
>> >> >>  		area_order = get_order(array_size(sizeof(*mem->areas),
>> >> >>  			mem->nareas));
>> >> >> @@ -563,6 +578,7 @@ void __init swiotlb_exit(void)
>> >> >>   * @gfp:	GFP flags for the allocation.
>> >> >>   * @bytes:	Size of the buffer.
>> >> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
>> >> >> + * @unencrypted: true to allocate unencrypted memory, false for encrypted memory
>> >> >>   *
>> >> >>   * Allocate pages from the buddy allocator. If successful, make the allocated
>> >> >>   * pages decrypted that they can be used for DMA.
>> >> >> @@ -570,7 +586,8 @@ void __init swiotlb_exit(void)
>> >> >>   * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
>> >> >>   * if the allocated physical address was above @phys_limit.
>> >> >>   */
>> >> >> -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
>> >> >> +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes,
>> >> >> +		u64 phys_limit, bool unencrypted)
>> >> >>  {
>> >> >>  	unsigned int order = get_order(bytes);
>> >> >>  	struct page *page;
>> >> >> @@ -588,13 +605,13 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
>> >> >>  	}
>> >> >>  
>> >> >>  	vaddr = phys_to_virt(paddr);
>> >> >> -	if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> >> +	if (unencrypted && set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> >>  		goto error;
>> >> >>  	return page;
>> >> >>  
>> >> >>  error:
>> >> >>  	/* Intentional leak if pages cannot be encrypted again. */
>> >> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> >> +	if (unencrypted && !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> >>  		__free_pages(page, order);
>> >> >>  	return NULL;
>> >> >>  }
>> >> >> @@ -604,30 +621,26 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
>> >> >>   * @dev:	Device for which a memory pool is allocated.
>> >> >>   * @bytes:	Size of the buffer.
>> >> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
>> >> >> + * @attrs:	DMA attributes for the allocation.
>> >> >>   * @gfp:	GFP flags for the allocation.
>> >> >>   *
>> >> >>   * Return: Allocated pages, or %NULL on allocation failure.
>> >> >>   */
>> >> >>  static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
>> >> >> -		u64 phys_limit, gfp_t gfp)
>> >> >> +		u64 phys_limit, unsigned long attrs, gfp_t gfp)
>> >> >>  {
>> >> >>  	struct page *page;
>> >> >> -	unsigned long attrs = 0;
>> >> >>  
>> >> >>  	/*
>> >> >>  	 * Allocate from the atomic pools if memory is encrypted and
>> >> >>  	 * the allocation is atomic, because decrypting may block.
>> >> >>  	 */
>> >> >> -	if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
>> >> >> +	if (!gfpflags_allow_blocking(gfp) && (attrs & DMA_ATTR_CC_SHARED)) {
>> >> >>  		void *vaddr;
>> >> >>  
>> >> >>  		if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
>> >> >>  			return NULL;
>> >> >>  
>> >> >> -		/* swiotlb considered decrypted by default */
>> >> >> -		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
>> >> >> -			attrs = DMA_ATTR_CC_SHARED;
>> >> >> -
>> >> >>  		return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
>> >> >>  					   attrs, dma_coherent_ok);
>> >> >>  	}
>> >> >> @@ -638,7 +651,8 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
>> >> >>  	else if (phys_limit <= DMA_BIT_MASK(32))
>> >> >>  		gfp |= __GFP_DMA32;
>> >> >>  
>> >> >> -	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
>> >> >> +	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit,
>> >> >> +					     !!(attrs & DMA_ATTR_CC_SHARED)))) {
>> >> >>  		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
>> >> >>  		    phys_limit < DMA_BIT_MASK(64) &&
>> >> >>  		    !(gfp & (__GFP_DMA32 | __GFP_DMA)))
>> >> >> @@ -657,15 +671,18 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
>> >> >>   * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
>> >> >>   * @vaddr:	Virtual address of the buffer.
>> >> >>   * @bytes:	Size of the buffer.
>> >> >> + * @unencrypted: true if @vaddr was allocated decrypted and must be
>> >> >> + *	re-encrypted before being freed
>> >> >>   */
>> >> >> -static void swiotlb_free_tlb(void *vaddr, size_t bytes)
>> >> >> +static void swiotlb_free_tlb(void *vaddr, size_t bytes, bool unencrypted)
>> >> >>  {
>> >> >>  	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
>> >> >>  	    dma_free_from_pool(NULL, vaddr, bytes))
>> >> >>  		return;
>> >> >>  
>> >> >>  	/* Intentional leak if pages cannot be encrypted again. */
>> >> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> >> +	if (!unencrypted ||
>> >> >> +	    !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> >>  		__free_pages(virt_to_page(vaddr), get_order(bytes));
>> >> >>  }
>> >> >>  
>> >> >> @@ -676,6 +693,7 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
>> >> >>   * @nslabs:	Desired (maximum) number of slabs.
>> >> >>   * @nareas:	Number of areas.
>> >> >>   * @phys_limit:	Maximum DMA buffer physical address.
>> >> >> + * @attrs:	DMA attributes for the allocation.
>> >> >>   * @gfp:	GFP flags for the allocations.
>> >> >>   *
>> >> >>   * Allocate and initialize a new IO TLB memory pool. The actual number of
>> >> >> @@ -686,7 +704,8 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
>> >> >>   */
>> >> >>  static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
>> >> >>  		unsigned long minslabs, unsigned long nslabs,
>> >> >> -		unsigned int nareas, u64 phys_limit, gfp_t gfp)
>> >> >> +		unsigned int nareas, u64 phys_limit, unsigned long attrs,
>> >> >> +		gfp_t gfp)
>> >> >>  {
>> >> >>  	struct io_tlb_pool *pool;
>> >> >>  	unsigned int slot_order;
>> >> >> @@ -704,9 +723,10 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
>> >> >>  	if (!pool)
>> >> >>  		goto error;
>> >> >>  	pool->areas = (void *)pool + sizeof(*pool);
>> >> >> +	pool->unencrypted = !!(attrs & DMA_ATTR_CC_SHARED);
>> >> >>  
>> >> >>  	tlb_size = nslabs << IO_TLB_SHIFT;
>> >> >> -	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
>> >> >> +	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, attrs, gfp))) {
>> >> >>  		if (nslabs <= minslabs)
>> >> >>  			goto error_tlb;
>> >> >>  		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
>> >> >> @@ -724,7 +744,8 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
>> >> >>  	return pool;
>> >> >>  
>> >> >>  error_slots:
>> >> >> -	swiotlb_free_tlb(page_address(tlb), tlb_size);
>> >> >> +	swiotlb_free_tlb(page_address(tlb), tlb_size,
>> >> >> +			 !!(attrs & DMA_ATTR_CC_SHARED));
>> >> >>  error_tlb:
>> >> >>  	kfree(pool);
>> >> >>  error:
>> >> >> @@ -742,7 +763,9 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
>> >> >>  	struct io_tlb_pool *pool;
>> >> >>  
>> >> >>  	pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
>> >> >> -				  default_nareas, mem->phys_limit, GFP_KERNEL);
>> >> >> +				  default_nareas, mem->phys_limit,
>> >> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
>> >> >> +				  GFP_KERNEL);
>> >> >>  	if (!pool) {
>> >> >>  		pr_warn_ratelimited("Failed to allocate new pool");
>> >> >>  		return;
>> >> >> @@ -762,7 +785,7 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
>> >> >>  	size_t tlb_size = pool->end - pool->start;
>> >> >>  
>> >> >>  	free_pages((unsigned long)pool->slots, get_order(slots_size));
>> >> >> -	swiotlb_free_tlb(pool->vaddr, tlb_size);
>> >> >> +	swiotlb_free_tlb(pool->vaddr, tlb_size, pool->unencrypted);
>> >> >>  	kfree(pool);
>> >> >>  }
>> >> >>  
>> >> >> @@ -1232,6 +1255,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
>> >> >>  	nslabs = nr_slots(alloc_size);
>> >> >>  	phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
>> >> >>  	pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
>> >> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
>> >> >>  				  GFP_NOWAIT);
>> >> >>  	if (!pool)
>> >> >>  		return -1;
>> >> >> @@ -1394,6 +1418,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>> >> >>  		enum dma_data_direction dir, unsigned long attrs)
>> >> >>  {
>> >> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>> >> >> +	bool require_decrypted = false;
>> >> >>  	unsigned int offset;
>> >> >>  	struct io_tlb_pool *pool;
>> >> >>  	unsigned int i;
>> >> >> @@ -1411,6 +1436,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>> >> >>  	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
>> >> >>  		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
>> >> >>  
>> >> >> +	/*
>> >> >> +	 * if we are trying to swiotlb map a decrypted paddr or the paddr is encrypted
>> >> >> +	 * but the device is forcing decryption, use decrypted io_tlb_mem
>> >> >> +	 */
>> >> >> +	if ((attrs & DMA_ATTR_CC_SHARED) || force_dma_unencrypted(dev))
>> >> >> +		require_decrypted = true;
>> >> >> +
>> >> >> +	if (require_decrypted != mem->unencrypted)
>> >> >> +		return (phys_addr_t)DMA_MAPPING_ERROR;
>> >> >> +
>> >> >>  	/*
>> >> >>  	 * The default swiotlb memory pool is allocated with PAGE_SIZE
>> >> >>  	 * alignment. If a mapping is requested with larger alignment,
>> >> >> @@ -1608,8 +1643,14 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
>> >> >>  	if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
>> >> >>  		return DMA_MAPPING_ERROR;
>> >> >>  
>> >> >> -	/* Ensure that the address returned is DMA'ble */
>> >> >> -	dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
>> >> >> +	/*
>> >> >> +	 * Use the allocated io_tlb_mem encryption type to determine dma addr.
>> >> >> +	 */
>> >> >> +	if (dev->dma_io_tlb_mem->unencrypted)
>> >> >> +		dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
>> >> >> +	else
>> >> >> +		dma_addr = phys_to_dma_encrypted(dev, swiotlb_addr);
>> >> >> +
>> >> >>  	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
>> >> >>  		__swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
>> >> >>  			attrs | DMA_ATTR_SKIP_CPU_SYNC,
>> >> >> @@ -1773,7 +1814,8 @@ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
>> >> >>  
>> >> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
>> >> >>  
>> >> >> -struct page *swiotlb_alloc(struct device *dev, size_t size)
>> >> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
>> >> >> +		unsigned long attrs)
>> >> >>  {
>> >> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>> >> >>  	struct io_tlb_pool *pool;
>> >> >> @@ -1784,6 +1826,9 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
>> >> >>  	if (!mem)
>> >> >>  		return NULL;
>> >> >>  
>> >> >> +	if (mem->unencrypted != !!(attrs & DMA_ATTR_CC_SHARED))
>> >> >> +		return NULL;
>> >> >> +
>> >> >>  	align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
>> >> >>  	index = swiotlb_find_slots(dev, 0, size, align, &pool);
>> >> >>  	if (index == -1)
>> >> >> @@ -1853,9 +1898,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
>> >> >>  			kfree(mem);
>> >> >>  			return -ENOMEM;
>> >> >>  		}
>> >> >> +		/*
>> >> >> +		 * if platform supports memory encryption,
>> >> >> +		 * restricted mem pool is decrypted by default
>> >> >> +		 */
>> >> >> +		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
>> >> >> +			mem->unencrypted = true;
>> >> >> +			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
>> >> >> +					     rmem->size >> PAGE_SHIFT);
>> >> >> +		} else {
>> >> >> +			mem->unencrypted = false;
>> >> >> +		}
>> >> >
>> >> > This breaks pKVM as it doesn’t set CC_ATTR_MEM_ENCRYPT, so all virtio
>> >> > traffic now fails.
>> >> >
>> >> > Also, by design, some drivers are clueless about bouncing, so
>> >> > I believe that the pool should have a way to control it’s property
>> >> > (encrypted or decrypted) and that takes priority over whatever
>> >> > attributes comes from allocation.
>> >> > And that brings us to the same point whether it’s better to return
>> >> > the memory along with it’s state or we pass the requested state.
>> >> > I think for other cases it’s fine for the device/DMA-API to dictate
>> >> > the attrs, but not in restricted-dma case, the firmware just knows better.
>> >> >
>> >> 
>> >> Is it that the pKVM guest kernel does not have awareness of
>> >> encrypted/decrypted DMA allocations? Instead, the firmware attaches
>> >> hypervisor-shared pages to the device via restricted-dma-pool? The
>> >> kernel then has swiotlb->for_alloc = true, and hence all DMA allocations
>> >> go through the restricted-dma-pool?
>> >
>> > Yes.
>> >
>> >> 
>> >> Given that pKVM supports pkvm_set_memory_encrypted() and
>> >> pkvm_set_memory_decrypted(), can we consider adding CC_ATTR_MEM_ENCRYPT
>> >> support to pKVM? It would also be good to investigate whether we can set
>> >> force_dma_unencrypted(dev) to true where needed.
>> >
>> > I was looking in to that, but it didn't work because
>> > force_dma_unencrypted() is broken with restricted-dma due to the
>> > double decryption issue, that's when I sent my first series [1]
>> >
>> > May be we should land some basic fixes for that path so we can
>> > convert pKVM, then we do the full rework.
>> >
>> > I will revive my old work and see if I can send a RFC.
>> >
>> > [1] https://lore.kernel.org/all/20260305170335.963568-1-smostafa@google.com/
>> >
>> 
>> With this series, can you check whether the only change needed is
>> something like the following?
>> 
>> modified   kernel/dma/swiotlb.c
>> @@ -1905,7 +1905,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
>>  		 * if platform supports memory encryption,
>>  		 * restricted mem pool is decrypted by default
>>  		 */
>> -		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
>> +		//if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
>> +		if (true) {
>>  			mem->unencrypted = true;
>>  			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
>>  					     rmem->size >> PAGE_SHIFT);
>
> Yes, that boots, but I will need to do more tests.
>
>> 
>> >
>> >> 
>> >> I agree that this patch, as it stands, can break pKVM because we are now
>> >> missing the set_memory_decrypted() call required for pKVM to work.
>> >> 
>> >> We now mark the swiotlb io_tlb_mem as unencrypted/encrypted in the guest
>> >> using struct io_tlb_mem->unencrypted. I am not clear what we can use for
>> >> pKVM to conditionalize this so that it works for both protected and
>> >> unprotected guests.
>> >
>> > There is no problem with non-protected guests as they don't use memory
>> > encryption, my initial thought was that th encrpyted/decrypted is
>> > per-pool property which is decided by FW (device-tree).
>> >
>> 
>> What I meant was that we need a generic way to identify a pKVM guest, so
>> that we can use it in the conditional above.
>
> I have this patch, with that I can boot with your series unmodified,
> but I will need to do more testing.
>

Thanks, I can add this to the series once you complete the required testing.

>
> From d795b4c4ee2437587616b2b342e9996afe6d6680 Mon Sep 17 00:00:00 2001
> From: Mostafa Saleh <smostafa@google.com>
> Date: Thu, 14 May 2026 13:46:15 +0000
> Subject: [PATCH] arm64/coco: Add pKVM as a CC platform
>
> pKVM does support memory encryption, expose that to the rest of
> the kernel through cc_platform_has()
>
> At the moment, all devices inside the guest are emulated which
> requires its memory to be shared back to the host (decrypted), so
> set force_dma_unencrypted() to always return true.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>  arch/arm64/include/asm/hypervisor.h           |  6 ++++++
>  arch/arm64/include/asm/mem_encrypt.h          |  3 ++-
>  arch/arm64/kernel/rsi.c                       | 12 ------------
>  arch/arm64/mm/init.c                          | 13 +++++++++++++
>  drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c |  5 +++++
>  5 files changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
> index a12fd897c877..1b0e15f290be 100644
> --- a/arch/arm64/include/asm/hypervisor.h
> +++ b/arch/arm64/include/asm/hypervisor.h
> @@ -10,8 +10,14 @@ void kvm_arm_target_impl_cpu_init(void);
>
>  #ifdef CONFIG_ARM_PKVM_GUEST
>  void pkvm_init_hyp_services(void);
> +bool is_protected_kvm_guest(void);
>  #else
>  static inline void pkvm_init_hyp_services(void) { };
> +
> +static inline bool is_protected_kvm_guest(void)
> +{
> +	return false;
> +}
>  #endif
>
>  static inline void kvm_arch_init_hyp_services(void)
> diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
> index 314b2b52025f..636f45b4d8af 100644
> --- a/arch/arm64/include/asm/mem_encrypt.h
> +++ b/arch/arm64/include/asm/mem_encrypt.h
> @@ -2,6 +2,7 @@
>  #ifndef __ASM_MEM_ENCRYPT_H
>  #define __ASM_MEM_ENCRYPT_H
>
> +#include <asm/hypervisor.h>
>  #include <asm/rsi.h>
>
>  struct device;
> @@ -20,7 +21,7 @@ int realm_register_memory_enc_ops(void);
>
>  static inline bool force_dma_unencrypted(struct device *dev)
>  {
> -	return is_realm_world();
> +	return is_realm_world() || is_protected_kvm_guest();
>  }
>
>  /*
> diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
> index 92160f2e57ff..25ca75ce1a4d 100644
> --- a/arch/arm64/kernel/rsi.c
> +++ b/arch/arm64/kernel/rsi.c
> @@ -7,7 +7,6 @@
>  #include <linux/memblock.h>
>  #include <linux/psci.h>
>  #include <linux/swiotlb.h>
> -#include <linux/cc_platform.h>
>  #include <linux/platform_device.h>
>
>  #include <asm/io.h>
> @@ -23,17 +22,6 @@ EXPORT_SYMBOL(prot_ns_shared);
>  DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
>  EXPORT_SYMBOL(rsi_present);
>
> -bool cc_platform_has(enum cc_attr attr)
> -{
> -	switch (attr) {
> -	case CC_ATTR_MEM_ENCRYPT:
> -		return is_realm_world();
> -	default:
> -		return false;
> -	}
> -}
> -EXPORT_SYMBOL_GPL(cc_platform_has);
> -
>  static bool rsi_version_matches(void)
>  {
>  	unsigned long ver_lower, ver_higher;
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index acf67c7064db..a087ac5b15f7 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -12,6 +12,7 @@
>  #include <linux/swap.h>
>  #include <linux/init.h>
>  #include <linux/cache.h>
> +#include <linux/cc_platform.h>
>  #include <linux/mman.h>
>  #include <linux/nodemask.h>
>  #include <linux/initrd.h>
> @@ -36,6 +37,7 @@
>
>  #include <asm/boot.h>
>  #include <asm/fixmap.h>
> +#include <asm/hypervisor.h>
>  #include <asm/kasan.h>
>  #include <asm/kernel-pgtable.h>
>  #include <asm/kvm_host.h>
> @@ -414,6 +416,17 @@ void dump_mem_limit(void)
>  	}
>  }
>
> +bool cc_platform_has(enum cc_attr attr)
> +{
> +	switch (attr) {
> +	case CC_ATTR_MEM_ENCRYPT:
> +		return is_realm_world() || is_protected_kvm_guest();
> +	default:
> +		return false;
> +	}
> +}
> +EXPORT_SYMBOL_GPL(cc_platform_has);
> +
>  #ifdef CONFIG_EXECMEM
>  static u64 module_direct_base __ro_after_init = 0;
>  static u64 module_plt_base __ro_after_init = 0;
> diff --git a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c
> index 4230b817a80b..297e6d6019b8 100644
> --- a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c
> +++ b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c
> @@ -95,6 +95,11 @@ static int mmio_guard_ioremap_hook(phys_addr_t phys, size_t size,
>  	return 0;
>  }
>
> +bool is_protected_kvm_guest(void)
> +{
> +	return !!pkvm_granule;
> +}
> +
>  void pkvm_init_hyp_services(void)
>  {
>  	int i;


-aneesh

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Aneesh Kumar K.V @ 2026-05-14 14:38 UTC (permalink / raw)
  To: Catalin Marinas, Greg KH
  Cc: Suzuki K Poulose, linux-coco, linux-arm-kernel, linux-kernel,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Sudeep Holla, Will Deacon, Steven Price
In-Reply-To: <agXL12bNh4gGyK1K@arm.com>

Catalin Marinas <catalin.marinas@arm.com> writes:

> On Thu, May 14, 2026 at 02:55:48PM +0200, Greg Kroah-Hartman wrote:
>> On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
>> > On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
>> > > Make the SMCCC driver responsible for registering the arm-smccc platform
>> > > device and after confirming the relevant SMCCC function IDs, create
>> > > the arm_cca_guest auxiliary device.
>> > > 
>> > 
>> > There are a few changes squashed in to this patch. Please could we
>> > split the patch in the following order ?
>> > 
>> > 1. Add platform device for arm-smccc
>> 
>> Do not make any more "fake" platform devices please.
>> 
>> > 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
>> > it before the RSI changes)
>> 
>> No, move it to the faux api please.
>
> So should we end up with:
>
>   /sys/devices/faux/arm-smccc/
>     smccc_trng/
>     arm-rsi-dev/
>       tsm/tsm0
>
>   /sys/class/tsm/tsm0
>     -> ../../devices/faux/arm-smccc/arm-rsi-dev/tsm/tsm0
>
>   /sys/firmware/cca/
>     realm_guest

But we need the ability to autoload different TSM backend drivers based
on the support/availability of these SMCCC function-id ranges. faux
device don't support that.

-aneesh

^ permalink raw reply

* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Jason Gunthorpe @ 2026-05-14 14:37 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Mostafa Saleh, iommu, linux-arm-kernel, linux-kernel, linux-coco,
	Robin Murphy, Marek Szyprowski, Will Deacon, Marc Zyngier,
	Steven Price, Suzuki K Poulose, Catalin Marinas, Jiri Pirko,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <yq5apl2y5f96.fsf@kernel.org>

On Thu, May 14, 2026 at 06:18:05PM +0530, Aneesh Kumar K.V wrote:
> > There is no problem with non-protected guests as they don't use memory
> > encryption, my initial thought was that th encrpyted/decrypted is
> > per-pool property which is decided by FW (device-tree).
> 
> What I meant was that we need a generic way to identify a pKVM guest, so
> that we can use it in the conditional above.

If I understood Mostafa's remarks I think different devices in the
guest need shared/decrypted and some don't? Ie a virtio hypervisor
device needs shared while a real PCI device doesn't? Is that right?

In CC terms that would be a mixture of T=0 and T=1 devices hardwired
and signaled by firwmare..

Ideally we'd have a flow where if the arch precreates a swiotlb pool
with special parameters this overrides all other decision making. Then
this series is about making CC NOT use that flow... ??

Jason

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Aneesh Kumar K.V @ 2026-05-14 14:37 UTC (permalink / raw)
  To: Greg KH, Suzuki K Poulose
  Cc: linux-coco, linux-arm-kernel, linux-kernel, Catalin Marinas,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Sudeep Holla, Will Deacon, Steven Price
In-Reply-To: <2026051420-amusement-drove-73e6@gregkh>

Greg KH <gregkh@linuxfoundation.org> writes:

> On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
>> Hi Aneesh
>> 
>> On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
>> > Make the SMCCC driver responsible for registering the arm-smccc platform
>> > device and after confirming the relevant SMCCC function IDs, create
>> > the arm_cca_guest auxiliary device.
>> > 
>> 
>> There are a few changes squashed in to this patch. Please could we
>> split the patch in the following order ?
>> 
>> 1. Add platform device for arm-smccc
>
> Do not make any more "fake" platform devices please.
>
>> 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
>> it before the RSI changes)
>
> No, move it to the faux api please.
>


Maybe I was not complete in my previous reply. I did not want to repeat
the entire thread, so I quoted the lore link for more details.

1. We have platform firmware-provided SMCCC interfaces. Based on the
support/availability of these function IDs, we want to load multiple
drivers.
2. This patch series adds a platform device to represent the
firmware-provided SMCCC resource.
3. Different SMCCC ranges are now represented as auxiliary devices.
4. Different subsystems, such as TSM, can autoload their backend drivers
based on the availability of these SMCCC ranges, which are now
represented as auxiliary devices.

You had agreed to all of this in the previous discussion here:
https://lore.kernel.org/all/2025101516-handbook-hyphen-62ec@gregkh

-aneesh

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Greg KH @ 2026-05-14 14:23 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Suzuki K Poulose, Aneesh Kumar K.V (Arm), linux-coco,
	linux-arm-kernel, linux-kernel, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price
In-Reply-To: <agXSI6PPf4uR7VBL@arm.com>

On Thu, May 14, 2026 at 02:46:11PM +0100, Catalin Marinas wrote:
> On Thu, May 14, 2026 at 03:25:34PM +0200, Greg Kroah-Hartman wrote:
> > On Thu, May 14, 2026 at 02:19:19PM +0100, Catalin Marinas wrote:
> > > On Thu, May 14, 2026 at 02:55:48PM +0200, Greg Kroah-Hartman wrote:
> > > > On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
> > > > > On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> > > > > > Make the SMCCC driver responsible for registering the arm-smccc platform
> > > > > > device and after confirming the relevant SMCCC function IDs, create
> > > > > > the arm_cca_guest auxiliary device.
> > > > > > 
> > > > > 
> > > > > There are a few changes squashed in to this patch. Please could we
> > > > > split the patch in the following order ?
> > > > > 
> > > > > 1. Add platform device for arm-smccc
> > > > 
> > > > Do not make any more "fake" platform devices please.
> > > > 
> > > > > 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
> > > > > it before the RSI changes)
> > > > 
> > > > No, move it to the faux api please.
> > > 
> > > So should we end up with:
> > > 
> > >   /sys/devices/faux/arm-smccc/
> > >     smccc_trng/
> > >     arm-rsi-dev/
> > 
> > What types are these child devices?  Also faux ones?
> 
> They'd also be faux devices with this structure (in practice they are
> firmware interfaces that may be backed by some hardware like in the TRNG
> case, though not directly accessible to Linux).

Great, seems sane to me!

^ permalink raw reply

* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Mostafa Saleh @ 2026-05-14 14:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Jason Gunthorpe,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <yq5apl2y5f96.fsf@kernel.org>

On Thu, May 14, 2026 at 06:18:05PM +0530, Aneesh Kumar K.V wrote:
> Mostafa Saleh <smostafa@google.com> writes:
> 
> > On Thu, May 14, 2026 at 11:24:42AM +0530, Aneesh Kumar K.V wrote:
> >> Mostafa Saleh <smostafa@google.com> writes:
> >> 
> >> > On Tue, May 12, 2026 at 02:33:59PM +0530, Aneesh Kumar K.V (Arm) wrote:
> >> >> Teach swiotlb to distinguish between encrypted and decrypted bounce
> >> >> buffer pools, and make allocation and mapping paths select a pool whose
> >> >> state matches the requested DMA attributes.
> >> >> 
> >> >> Add a decrypted flag to io_tlb_mem, initialize it for the default and
> >> >> restricted pools, and propagate DMA_ATTR_CC_SHARED into swiotlb pool
> >> >> allocation. Reject swiotlb alloc/map requests when the selected pool does
> >> >> not match the required encrypted/decrypted state.
> >> >> 
> >> >> Also return DMA addresses with the matching phys_to_dma_{encrypted,
> >> >> unencrypted} helper so the DMA address encoding stays consistent with the
> >> >> chosen pool.
> >> >> 
> >> >> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> >> >> ---
> >> >>  include/linux/dma-direct.h |  10 ++++
> >> >>  include/linux/swiotlb.h    |   8 ++-
> >> >>  kernel/dma/direct.c        |  14 +++--
> >> >>  kernel/dma/swiotlb.c       | 108 +++++++++++++++++++++++++++----------
> >> >>  4 files changed, 107 insertions(+), 33 deletions(-)
> >> >> 
> >> >> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> >> >> index c249912456f9..94fad4e7c11e 100644
> >> >> --- a/include/linux/dma-direct.h
> >> >> +++ b/include/linux/dma-direct.h
> >> >> @@ -77,6 +77,10 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map)
> >> >>  #ifndef phys_to_dma_unencrypted
> >> >>  #define phys_to_dma_unencrypted		phys_to_dma
> >> >>  #endif
> >> >> +
> >> >> +#ifndef phys_to_dma_encrypted
> >> >> +#define phys_to_dma_encrypted		phys_to_dma
> >> >> +#endif
> >> >>  #else
> >> >>  static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> >> >>  {
> >> >> @@ -90,6 +94,12 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
> >> >>  {
> >> >>  	return dma_addr_unencrypted(__phys_to_dma(dev, paddr));
> >> >>  }
> >> >> +
> >> >> +static inline dma_addr_t phys_to_dma_encrypted(struct device *dev,
> >> >> +		phys_addr_t paddr)
> >> >> +{
> >> >> +	return dma_addr_encrypted(__phys_to_dma(dev, paddr));
> >> >> +}
> >> >>  /*
> >> >>   * If memory encryption is supported, phys_to_dma will set the memory encryption
> >> >>   * bit in the DMA address, and dma_to_phys will clear it.
> >> >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> >> >> index 3dae0f592063..b3fa3c6e0169 100644
> >> >> --- a/include/linux/swiotlb.h
> >> >> +++ b/include/linux/swiotlb.h
> >> >> @@ -81,6 +81,7 @@ struct io_tlb_pool {
> >> >>  	struct list_head node;
> >> >>  	struct rcu_head rcu;
> >> >>  	bool transient;
> >> >> +	bool unencrypted;
> >> >>  #endif
> >> >>  };
> >> >>  
> >> >> @@ -111,6 +112,7 @@ struct io_tlb_mem {
> >> >>  	struct dentry *debugfs;
> >> >>  	bool force_bounce;
> >> >>  	bool for_alloc;
> >> >> +	bool unencrypted;
> >> >>  #ifdef CONFIG_SWIOTLB_DYNAMIC
> >> >>  	bool can_grow;
> >> >>  	u64 phys_limit;
> >> >> @@ -282,7 +284,8 @@ static inline void swiotlb_sync_single_for_cpu(struct device *dev,
> >> >>  extern void swiotlb_print_info(void);
> >> >>  
> >> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
> >> >> -struct page *swiotlb_alloc(struct device *dev, size_t size);
> >> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
> >> >> +		unsigned long attrs);
> >> >>  bool swiotlb_free(struct device *dev, struct page *page, size_t size);
> >> >>  
> >> >>  static inline bool is_swiotlb_for_alloc(struct device *dev)
> >> >> @@ -290,7 +293,8 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
> >> >>  	return dev->dma_io_tlb_mem->for_alloc;
> >> >>  }
> >> >>  #else
> >> >> -static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
> >> >> +static inline struct page *swiotlb_alloc(struct device *dev, size_t size,
> >> >> +		unsigned long attrs)
> >> >>  {
> >> >>  	return NULL;
> >> >>  }
> >> >> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> >> >> index dc2907439b3d..97ae4fa10521 100644
> >> >> --- a/kernel/dma/direct.c
> >> >> +++ b/kernel/dma/direct.c
> >> >> @@ -104,9 +104,10 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page,
> >> >>  	dma_free_contiguous(dev, page, size);
> >> >>  }
> >> >>  
> >> >> -static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size)
> >> >> +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size,
> >> >> +		unsigned long attrs)
> >> >>  {
> >> >> -	struct page *page = swiotlb_alloc(dev, size);
> >> >> +	struct page *page = swiotlb_alloc(dev, size, attrs);
> >> >>  
> >> >>  	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
> >> >>  		swiotlb_free(dev, page, size);
> >> >> @@ -266,8 +267,12 @@ void *dma_direct_alloc(struct device *dev, size_t size,
> >> >>  						  gfp, attrs);
> >> >>  
> >> >>  	if (is_swiotlb_for_alloc(dev)) {
> >> >> -		page = dma_direct_alloc_swiotlb(dev, size);
> >> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
> >> >>  		if (page) {
> >> >> +			/*
> >> >> +			 * swiotlb allocations comes from pool already marked
> >> >> +			 * decrypted
> >> >> +			 */
> >> >>  			mark_mem_decrypt = false;
> >> >>  			goto setup_page;
> >> >>  		}
> >> >> @@ -374,6 +379,7 @@ void dma_direct_free(struct device *dev, size_t size,
> >> >>  		return;
> >> >>  
> >> >>  	if (swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)))
> >> >> +		/* Swiotlb doesn't need a page attribute update on free */
> >> >>  		mark_mem_encrypted = false;
> >> >>  
> >> >>  	if (is_vmalloc_addr(cpu_addr)) {
> >> >> @@ -403,7 +409,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
> >> >>  						  gfp, attrs);
> >> >>  
> >> >>  	if (is_swiotlb_for_alloc(dev)) {
> >> >> -		page = dma_direct_alloc_swiotlb(dev, size);
> >> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
> >> >>  		if (!page)
> >> >>  			return NULL;
> >> >>  
> >> >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> >> >> index ab4eccbaa076..065663be282c 100644
> >> >> --- a/kernel/dma/swiotlb.c
> >> >> +++ b/kernel/dma/swiotlb.c
> >> >> @@ -259,10 +259,21 @@ void __init swiotlb_update_mem_attributes(void)
> >> >>  	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
> >> >>  	unsigned long bytes;
> >> >>  
> >> >> +	/*
> >> >> +	 * if platform support memory encryption, swiotlb buffers are
> >> >> +	 * decrypted by default.
> >> >> +	 */
> >> >> +	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >> >> +		io_tlb_default_mem.unencrypted = true;
> >> >> +	else
> >> >> +		io_tlb_default_mem.unencrypted = false;
> >> >> +
> >> >>  	if (!mem->nslabs || mem->late_alloc)
> >> >>  		return;
> >> >>  	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
> >> >> -	set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
> >> >> +
> >> >> +	if (io_tlb_default_mem.unencrypted)
> >> >> +		set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
> >> >>  }
> >> >>  
> >> >>  static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
> >> >> @@ -505,8 +516,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
> >> >>  	if (!mem->slots)
> >> >>  		goto error_slots;
> >> >>  
> >> >> -	set_memory_decrypted((unsigned long)vstart,
> >> >> -			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
> >> >> +	if (io_tlb_default_mem.unencrypted)
> >> >> +		set_memory_decrypted((unsigned long)vstart,
> >> >> +				     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
> >> >> +
> >> >>  	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
> >> >>  				 nareas);
> >> >>  	add_mem_pool(&io_tlb_default_mem, mem);
> >> >> @@ -539,7 +552,9 @@ void __init swiotlb_exit(void)
> >> >>  	tbl_size = PAGE_ALIGN(mem->end - mem->start);
> >> >>  	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
> >> >>  
> >> >> -	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
> >> >> +	if (io_tlb_default_mem.unencrypted)
> >> >> +		set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
> >> >> +
> >> >>  	if (mem->late_alloc) {
> >> >>  		area_order = get_order(array_size(sizeof(*mem->areas),
> >> >>  			mem->nareas));
> >> >> @@ -563,6 +578,7 @@ void __init swiotlb_exit(void)
> >> >>   * @gfp:	GFP flags for the allocation.
> >> >>   * @bytes:	Size of the buffer.
> >> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
> >> >> + * @unencrypted: true to allocate unencrypted memory, false for encrypted memory
> >> >>   *
> >> >>   * Allocate pages from the buddy allocator. If successful, make the allocated
> >> >>   * pages decrypted that they can be used for DMA.
> >> >> @@ -570,7 +586,8 @@ void __init swiotlb_exit(void)
> >> >>   * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
> >> >>   * if the allocated physical address was above @phys_limit.
> >> >>   */
> >> >> -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
> >> >> +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes,
> >> >> +		u64 phys_limit, bool unencrypted)
> >> >>  {
> >> >>  	unsigned int order = get_order(bytes);
> >> >>  	struct page *page;
> >> >> @@ -588,13 +605,13 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
> >> >>  	}
> >> >>  
> >> >>  	vaddr = phys_to_virt(paddr);
> >> >> -	if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> >> +	if (unencrypted && set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> >>  		goto error;
> >> >>  	return page;
> >> >>  
> >> >>  error:
> >> >>  	/* Intentional leak if pages cannot be encrypted again. */
> >> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> >> +	if (unencrypted && !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> >>  		__free_pages(page, order);
> >> >>  	return NULL;
> >> >>  }
> >> >> @@ -604,30 +621,26 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
> >> >>   * @dev:	Device for which a memory pool is allocated.
> >> >>   * @bytes:	Size of the buffer.
> >> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
> >> >> + * @attrs:	DMA attributes for the allocation.
> >> >>   * @gfp:	GFP flags for the allocation.
> >> >>   *
> >> >>   * Return: Allocated pages, or %NULL on allocation failure.
> >> >>   */
> >> >>  static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
> >> >> -		u64 phys_limit, gfp_t gfp)
> >> >> +		u64 phys_limit, unsigned long attrs, gfp_t gfp)
> >> >>  {
> >> >>  	struct page *page;
> >> >> -	unsigned long attrs = 0;
> >> >>  
> >> >>  	/*
> >> >>  	 * Allocate from the atomic pools if memory is encrypted and
> >> >>  	 * the allocation is atomic, because decrypting may block.
> >> >>  	 */
> >> >> -	if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
> >> >> +	if (!gfpflags_allow_blocking(gfp) && (attrs & DMA_ATTR_CC_SHARED)) {
> >> >>  		void *vaddr;
> >> >>  
> >> >>  		if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
> >> >>  			return NULL;
> >> >>  
> >> >> -		/* swiotlb considered decrypted by default */
> >> >> -		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >> >> -			attrs = DMA_ATTR_CC_SHARED;
> >> >> -
> >> >>  		return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
> >> >>  					   attrs, dma_coherent_ok);
> >> >>  	}
> >> >> @@ -638,7 +651,8 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
> >> >>  	else if (phys_limit <= DMA_BIT_MASK(32))
> >> >>  		gfp |= __GFP_DMA32;
> >> >>  
> >> >> -	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
> >> >> +	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit,
> >> >> +					     !!(attrs & DMA_ATTR_CC_SHARED)))) {
> >> >>  		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
> >> >>  		    phys_limit < DMA_BIT_MASK(64) &&
> >> >>  		    !(gfp & (__GFP_DMA32 | __GFP_DMA)))
> >> >> @@ -657,15 +671,18 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
> >> >>   * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
> >> >>   * @vaddr:	Virtual address of the buffer.
> >> >>   * @bytes:	Size of the buffer.
> >> >> + * @unencrypted: true if @vaddr was allocated decrypted and must be
> >> >> + *	re-encrypted before being freed
> >> >>   */
> >> >> -static void swiotlb_free_tlb(void *vaddr, size_t bytes)
> >> >> +static void swiotlb_free_tlb(void *vaddr, size_t bytes, bool unencrypted)
> >> >>  {
> >> >>  	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> >> >>  	    dma_free_from_pool(NULL, vaddr, bytes))
> >> >>  		return;
> >> >>  
> >> >>  	/* Intentional leak if pages cannot be encrypted again. */
> >> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> >> +	if (!unencrypted ||
> >> >> +	    !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> >>  		__free_pages(virt_to_page(vaddr), get_order(bytes));
> >> >>  }
> >> >>  
> >> >> @@ -676,6 +693,7 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
> >> >>   * @nslabs:	Desired (maximum) number of slabs.
> >> >>   * @nareas:	Number of areas.
> >> >>   * @phys_limit:	Maximum DMA buffer physical address.
> >> >> + * @attrs:	DMA attributes for the allocation.
> >> >>   * @gfp:	GFP flags for the allocations.
> >> >>   *
> >> >>   * Allocate and initialize a new IO TLB memory pool. The actual number of
> >> >> @@ -686,7 +704,8 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
> >> >>   */
> >> >>  static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
> >> >>  		unsigned long minslabs, unsigned long nslabs,
> >> >> -		unsigned int nareas, u64 phys_limit, gfp_t gfp)
> >> >> +		unsigned int nareas, u64 phys_limit, unsigned long attrs,
> >> >> +		gfp_t gfp)
> >> >>  {
> >> >>  	struct io_tlb_pool *pool;
> >> >>  	unsigned int slot_order;
> >> >> @@ -704,9 +723,10 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
> >> >>  	if (!pool)
> >> >>  		goto error;
> >> >>  	pool->areas = (void *)pool + sizeof(*pool);
> >> >> +	pool->unencrypted = !!(attrs & DMA_ATTR_CC_SHARED);
> >> >>  
> >> >>  	tlb_size = nslabs << IO_TLB_SHIFT;
> >> >> -	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
> >> >> +	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, attrs, gfp))) {
> >> >>  		if (nslabs <= minslabs)
> >> >>  			goto error_tlb;
> >> >>  		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
> >> >> @@ -724,7 +744,8 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
> >> >>  	return pool;
> >> >>  
> >> >>  error_slots:
> >> >> -	swiotlb_free_tlb(page_address(tlb), tlb_size);
> >> >> +	swiotlb_free_tlb(page_address(tlb), tlb_size,
> >> >> +			 !!(attrs & DMA_ATTR_CC_SHARED));
> >> >>  error_tlb:
> >> >>  	kfree(pool);
> >> >>  error:
> >> >> @@ -742,7 +763,9 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
> >> >>  	struct io_tlb_pool *pool;
> >> >>  
> >> >>  	pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
> >> >> -				  default_nareas, mem->phys_limit, GFP_KERNEL);
> >> >> +				  default_nareas, mem->phys_limit,
> >> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
> >> >> +				  GFP_KERNEL);
> >> >>  	if (!pool) {
> >> >>  		pr_warn_ratelimited("Failed to allocate new pool");
> >> >>  		return;
> >> >> @@ -762,7 +785,7 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
> >> >>  	size_t tlb_size = pool->end - pool->start;
> >> >>  
> >> >>  	free_pages((unsigned long)pool->slots, get_order(slots_size));
> >> >> -	swiotlb_free_tlb(pool->vaddr, tlb_size);
> >> >> +	swiotlb_free_tlb(pool->vaddr, tlb_size, pool->unencrypted);
> >> >>  	kfree(pool);
> >> >>  }
> >> >>  
> >> >> @@ -1232,6 +1255,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> >> >>  	nslabs = nr_slots(alloc_size);
> >> >>  	phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
> >> >>  	pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
> >> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
> >> >>  				  GFP_NOWAIT);
> >> >>  	if (!pool)
> >> >>  		return -1;
> >> >> @@ -1394,6 +1418,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> >> >>  		enum dma_data_direction dir, unsigned long attrs)
> >> >>  {
> >> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> >> >> +	bool require_decrypted = false;
> >> >>  	unsigned int offset;
> >> >>  	struct io_tlb_pool *pool;
> >> >>  	unsigned int i;
> >> >> @@ -1411,6 +1436,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> >> >>  	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >> >>  		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> >> >>  
> >> >> +	/*
> >> >> +	 * if we are trying to swiotlb map a decrypted paddr or the paddr is encrypted
> >> >> +	 * but the device is forcing decryption, use decrypted io_tlb_mem
> >> >> +	 */
> >> >> +	if ((attrs & DMA_ATTR_CC_SHARED) || force_dma_unencrypted(dev))
> >> >> +		require_decrypted = true;
> >> >> +
> >> >> +	if (require_decrypted != mem->unencrypted)
> >> >> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> >> >> +
> >> >>  	/*
> >> >>  	 * The default swiotlb memory pool is allocated with PAGE_SIZE
> >> >>  	 * alignment. If a mapping is requested with larger alignment,
> >> >> @@ -1608,8 +1643,14 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
> >> >>  	if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
> >> >>  		return DMA_MAPPING_ERROR;
> >> >>  
> >> >> -	/* Ensure that the address returned is DMA'ble */
> >> >> -	dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
> >> >> +	/*
> >> >> +	 * Use the allocated io_tlb_mem encryption type to determine dma addr.
> >> >> +	 */
> >> >> +	if (dev->dma_io_tlb_mem->unencrypted)
> >> >> +		dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
> >> >> +	else
> >> >> +		dma_addr = phys_to_dma_encrypted(dev, swiotlb_addr);
> >> >> +
> >> >>  	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
> >> >>  		__swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
> >> >>  			attrs | DMA_ATTR_SKIP_CPU_SYNC,
> >> >> @@ -1773,7 +1814,8 @@ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
> >> >>  
> >> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
> >> >>  
> >> >> -struct page *swiotlb_alloc(struct device *dev, size_t size)
> >> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
> >> >> +		unsigned long attrs)
> >> >>  {
> >> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> >> >>  	struct io_tlb_pool *pool;
> >> >> @@ -1784,6 +1826,9 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
> >> >>  	if (!mem)
> >> >>  		return NULL;
> >> >>  
> >> >> +	if (mem->unencrypted != !!(attrs & DMA_ATTR_CC_SHARED))
> >> >> +		return NULL;
> >> >> +
> >> >>  	align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
> >> >>  	index = swiotlb_find_slots(dev, 0, size, align, &pool);
> >> >>  	if (index == -1)
> >> >> @@ -1853,9 +1898,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
> >> >>  			kfree(mem);
> >> >>  			return -ENOMEM;
> >> >>  		}
> >> >> +		/*
> >> >> +		 * if platform supports memory encryption,
> >> >> +		 * restricted mem pool is decrypted by default
> >> >> +		 */
> >> >> +		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
> >> >> +			mem->unencrypted = true;
> >> >> +			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
> >> >> +					     rmem->size >> PAGE_SHIFT);
> >> >> +		} else {
> >> >> +			mem->unencrypted = false;
> >> >> +		}
> >> >
> >> > This breaks pKVM as it doesn’t set CC_ATTR_MEM_ENCRYPT, so all virtio
> >> > traffic now fails.
> >> >
> >> > Also, by design, some drivers are clueless about bouncing, so
> >> > I believe that the pool should have a way to control it’s property
> >> > (encrypted or decrypted) and that takes priority over whatever
> >> > attributes comes from allocation.
> >> > And that brings us to the same point whether it’s better to return
> >> > the memory along with it’s state or we pass the requested state.
> >> > I think for other cases it’s fine for the device/DMA-API to dictate
> >> > the attrs, but not in restricted-dma case, the firmware just knows better.
> >> >
> >> 
> >> Is it that the pKVM guest kernel does not have awareness of
> >> encrypted/decrypted DMA allocations? Instead, the firmware attaches
> >> hypervisor-shared pages to the device via restricted-dma-pool? The
> >> kernel then has swiotlb->for_alloc = true, and hence all DMA allocations
> >> go through the restricted-dma-pool?
> >
> > Yes.
> >
> >> 
> >> Given that pKVM supports pkvm_set_memory_encrypted() and
> >> pkvm_set_memory_decrypted(), can we consider adding CC_ATTR_MEM_ENCRYPT
> >> support to pKVM? It would also be good to investigate whether we can set
> >> force_dma_unencrypted(dev) to true where needed.
> >
> > I was looking in to that, but it didn't work because
> > force_dma_unencrypted() is broken with restricted-dma due to the
> > double decryption issue, that's when I sent my first series [1]
> >
> > May be we should land some basic fixes for that path so we can
> > convert pKVM, then we do the full rework.
> >
> > I will revive my old work and see if I can send a RFC.
> >
> > [1] https://lore.kernel.org/all/20260305170335.963568-1-smostafa@google.com/
> >
> 
> With this series, can you check whether the only change needed is
> something like the following?
> 
> modified   kernel/dma/swiotlb.c
> @@ -1905,7 +1905,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
>  		 * if platform supports memory encryption,
>  		 * restricted mem pool is decrypted by default
>  		 */
> -		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
> +		//if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
> +		if (true) {
>  			mem->unencrypted = true;
>  			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
>  					     rmem->size >> PAGE_SHIFT);

Yes, that boots, but I will need to do more tests.

> 
> >
> >> 
> >> I agree that this patch, as it stands, can break pKVM because we are now
> >> missing the set_memory_decrypted() call required for pKVM to work.
> >> 
> >> We now mark the swiotlb io_tlb_mem as unencrypted/encrypted in the guest
> >> using struct io_tlb_mem->unencrypted. I am not clear what we can use for
> >> pKVM to conditionalize this so that it works for both protected and
> >> unprotected guests.
> >
> > There is no problem with non-protected guests as they don't use memory
> > encryption, my initial thought was that th encrpyted/decrypted is
> > per-pool property which is decided by FW (device-tree).
> >
> 
> What I meant was that we need a generic way to identify a pKVM guest, so
> that we can use it in the conditional above.

I have this patch, with that I can boot with your series unmodified,
but I will need to do more testing.

From d795b4c4ee2437587616b2b342e9996afe6d6680 Mon Sep 17 00:00:00 2001
From: Mostafa Saleh <smostafa@google.com>
Date: Thu, 14 May 2026 13:46:15 +0000
Subject: [PATCH] arm64/coco: Add pKVM as a CC platform

pKVM does support memory encryption, expose that to the rest of
the kernel through cc_platform_has()

At the moment, all devices inside the guest are emulated which
requires its memory to be shared back to the host (decrypted), so
set force_dma_unencrypted() to always return true.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 arch/arm64/include/asm/hypervisor.h           |  6 ++++++
 arch/arm64/include/asm/mem_encrypt.h          |  3 ++-
 arch/arm64/kernel/rsi.c                       | 12 ------------
 arch/arm64/mm/init.c                          | 13 +++++++++++++
 drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c |  5 +++++
 5 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index a12fd897c877..1b0e15f290be 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -10,8 +10,14 @@ void kvm_arm_target_impl_cpu_init(void);

 #ifdef CONFIG_ARM_PKVM_GUEST
 void pkvm_init_hyp_services(void);
+bool is_protected_kvm_guest(void);
 #else
 static inline void pkvm_init_hyp_services(void) { };
+
+static inline bool is_protected_kvm_guest(void)
+{
+	return false;
+}
 #endif

 static inline void kvm_arch_init_hyp_services(void)
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
index 314b2b52025f..636f45b4d8af 100644
--- a/arch/arm64/include/asm/mem_encrypt.h
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -2,6 +2,7 @@
 #ifndef __ASM_MEM_ENCRYPT_H
 #define __ASM_MEM_ENCRYPT_H

+#include <asm/hypervisor.h>
 #include <asm/rsi.h>

 struct device;
@@ -20,7 +21,7 @@ int realm_register_memory_enc_ops(void);

 static inline bool force_dma_unencrypted(struct device *dev)
 {
-	return is_realm_world();
+	return is_realm_world() || is_protected_kvm_guest();
 }

 /*
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index 92160f2e57ff..25ca75ce1a4d 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -7,7 +7,6 @@
 #include <linux/memblock.h>
 #include <linux/psci.h>
 #include <linux/swiotlb.h>
-#include <linux/cc_platform.h>
 #include <linux/platform_device.h>

 #include <asm/io.h>
@@ -23,17 +22,6 @@ EXPORT_SYMBOL(prot_ns_shared);
 DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
 EXPORT_SYMBOL(rsi_present);

-bool cc_platform_has(enum cc_attr attr)
-{
-	switch (attr) {
-	case CC_ATTR_MEM_ENCRYPT:
-		return is_realm_world();
-	default:
-		return false;
-	}
-}
-EXPORT_SYMBOL_GPL(cc_platform_has);
-
 static bool rsi_version_matches(void)
 {
 	unsigned long ver_lower, ver_higher;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index acf67c7064db..a087ac5b15f7 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -12,6 +12,7 @@
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/cache.h>
+#include <linux/cc_platform.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
@@ -36,6 +37,7 @@

 #include <asm/boot.h>
 #include <asm/fixmap.h>
+#include <asm/hypervisor.h>
 #include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/kvm_host.h>
@@ -414,6 +416,17 @@ void dump_mem_limit(void)
 	}
 }

+bool cc_platform_has(enum cc_attr attr)
+{
+	switch (attr) {
+	case CC_ATTR_MEM_ENCRYPT:
+		return is_realm_world() || is_protected_kvm_guest();
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
+
 #ifdef CONFIG_EXECMEM
 static u64 module_direct_base __ro_after_init = 0;
 static u64 module_plt_base __ro_after_init = 0;
diff --git a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c
index 4230b817a80b..297e6d6019b8 100644
--- a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c
+++ b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c
@@ -95,6 +95,11 @@ static int mmio_guard_ioremap_hook(phys_addr_t phys, size_t size,
 	return 0;
 }

+bool is_protected_kvm_guest(void)
+{
+	return !!pkvm_granule;
+}
+
 void pkvm_init_hyp_services(void)
 {
 	int i;
--
2.54.0.563.g4f69b47b94-goog


Thanks,
Mostafa
> 
> -aneesh

^ permalink raw reply related

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Catalin Marinas @ 2026-05-14 13:46 UTC (permalink / raw)
  To: Greg KH
  Cc: Suzuki K Poulose, Aneesh Kumar K.V (Arm), linux-coco,
	linux-arm-kernel, linux-kernel, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price
In-Reply-To: <2026051445-magician-coffee-962f@gregkh>

On Thu, May 14, 2026 at 03:25:34PM +0200, Greg Kroah-Hartman wrote:
> On Thu, May 14, 2026 at 02:19:19PM +0100, Catalin Marinas wrote:
> > On Thu, May 14, 2026 at 02:55:48PM +0200, Greg Kroah-Hartman wrote:
> > > On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
> > > > On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> > > > > Make the SMCCC driver responsible for registering the arm-smccc platform
> > > > > device and after confirming the relevant SMCCC function IDs, create
> > > > > the arm_cca_guest auxiliary device.
> > > > > 
> > > > 
> > > > There are a few changes squashed in to this patch. Please could we
> > > > split the patch in the following order ?
> > > > 
> > > > 1. Add platform device for arm-smccc
> > > 
> > > Do not make any more "fake" platform devices please.
> > > 
> > > > 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
> > > > it before the RSI changes)
> > > 
> > > No, move it to the faux api please.
> > 
> > So should we end up with:
> > 
> >   /sys/devices/faux/arm-smccc/
> >     smccc_trng/
> >     arm-rsi-dev/
> 
> What types are these child devices?  Also faux ones?

They'd also be faux devices with this structure (in practice they are
firmware interfaces that may be backed by some hardware like in the TRNG
case, though not directly accessible to Linux).

-- 
Catalin

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Greg KH @ 2026-05-14 13:25 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Suzuki K Poulose, Aneesh Kumar K.V (Arm), linux-coco,
	linux-arm-kernel, linux-kernel, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price
In-Reply-To: <agXL12bNh4gGyK1K@arm.com>

On Thu, May 14, 2026 at 02:19:19PM +0100, Catalin Marinas wrote:
> On Thu, May 14, 2026 at 02:55:48PM +0200, Greg Kroah-Hartman wrote:
> > On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
> > > On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> > > > Make the SMCCC driver responsible for registering the arm-smccc platform
> > > > device and after confirming the relevant SMCCC function IDs, create
> > > > the arm_cca_guest auxiliary device.
> > > > 
> > > 
> > > There are a few changes squashed in to this patch. Please could we
> > > split the patch in the following order ?
> > > 
> > > 1. Add platform device for arm-smccc
> > 
> > Do not make any more "fake" platform devices please.
> > 
> > > 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
> > > it before the RSI changes)
> > 
> > No, move it to the faux api please.
> 
> So should we end up with:
> 
>   /sys/devices/faux/arm-smccc/
>     smccc_trng/
>     arm-rsi-dev/

What types are these child devices?  Also faux ones?

If so, great.

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Catalin Marinas @ 2026-05-14 13:23 UTC (permalink / raw)
  To: Aneesh Kumar K.V (Arm)
  Cc: linux-coco, linux-arm-kernel, linux-kernel, Greg KH,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Sudeep Holla, Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <20260514094030.42495-2-aneesh.kumar@kernel.org>

On Thu, May 14, 2026 at 03:10:28PM +0530, Aneesh Kumar K.V (Arm) wrote:
> diff --git a/drivers/firmware/smccc/rmm.h b/drivers/firmware/smccc/rmm.h
> new file mode 100644
> index 000000000000..a47a650d4f51
> --- /dev/null
> +++ b/drivers/firmware/smccc/rmm.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _SMCCC_RMM_H
> +#define _SMCCC_RMM_H
> +
> +#include <linux/platform_device.h>
> +
> +#ifdef CONFIG_ARM64
> +#include <asm/rsi_cmds.h>
> +void __init register_rsi_device(struct platform_device *pdev);
> +#else
> +
> +static void __init register_rsi_device(struct platform_device *pdev)

Nit: static inline here (I think Sashiko mentioned it on a previous
version.

> +{
> +
> +}

And unnecessary empty line between curly braces.

Just these notpicks for now. Suzuki and Sudeep already covered the
splitting of this patch and we need to agree on the sysfs hierarchy.

-- 
Catalin

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Catalin Marinas @ 2026-05-14 13:19 UTC (permalink / raw)
  To: Greg KH
  Cc: Suzuki K Poulose, Aneesh Kumar K.V (Arm), linux-coco,
	linux-arm-kernel, linux-kernel, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price
In-Reply-To: <2026051420-amusement-drove-73e6@gregkh>

On Thu, May 14, 2026 at 02:55:48PM +0200, Greg Kroah-Hartman wrote:
> On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
> > On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> > > Make the SMCCC driver responsible for registering the arm-smccc platform
> > > device and after confirming the relevant SMCCC function IDs, create
> > > the arm_cca_guest auxiliary device.
> > > 
> > 
> > There are a few changes squashed in to this patch. Please could we
> > split the patch in the following order ?
> > 
> > 1. Add platform device for arm-smccc
> 
> Do not make any more "fake" platform devices please.
> 
> > 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
> > it before the RSI changes)
> 
> No, move it to the faux api please.

So should we end up with:

  /sys/devices/faux/arm-smccc/
    smccc_trng/
    arm-rsi-dev/
      tsm/tsm0

  /sys/class/tsm/tsm0
    -> ../../devices/faux/arm-smccc/arm-rsi-dev/tsm/tsm0

  /sys/firmware/cca/
    realm_guest

-- 
Catalin

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Greg KH @ 2026-05-14 12:55 UTC (permalink / raw)
  To: Suzuki K Poulose
  Cc: Aneesh Kumar K.V (Arm), linux-coco, linux-arm-kernel,
	linux-kernel, Catalin Marinas, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price
In-Reply-To: <0c88bcee-65b5-4328-87e6-e1c714c3d1ca@arm.com>

On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
> Hi Aneesh
> 
> On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> > Make the SMCCC driver responsible for registering the arm-smccc platform
> > device and after confirming the relevant SMCCC function IDs, create
> > the arm_cca_guest auxiliary device.
> > 
> 
> There are a few changes squashed in to this patch. Please could we
> split the patch in the following order ?
> 
> 1. Add platform device for arm-smccc

Do not make any more "fake" platform devices please.

> 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
> it before the RSI changes)

No, move it to the faux api please.

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Sudeep Holla @ 2026-05-14 12:50 UTC (permalink / raw)
  To: Suzuki K Poulose
  Cc: Aneesh Kumar K.V (Arm), linux-coco, linux-arm-kernel,
	Sudeep Holla, linux-kernel, Catalin Marinas, Greg KH,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Will Deacon, Steven Price
In-Reply-To: <0c88bcee-65b5-4328-87e6-e1c714c3d1ca@arm.com>

On Thu, May 14, 2026 at 12:04:13PM +0100, Suzuki K Poulose wrote:
> Hi Aneesh
> 
> On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> > Make the SMCCC driver responsible for registering the arm-smccc platform
> > device and after confirming the relevant SMCCC function IDs, create
> > the arm_cca_guest auxiliary device.
> > 
> 
> There are a few changes squashed in to this patch.

I had similar thoughts but I didn't get into forming a reply, now I can keep
it small, thanks for that 😉.

> Please could we split the patch in the following order ?
> 
> 1. Add platform device for arm-smccc
> 2. Move TRNG to Auxilliary Device - (Even though it is a later patch, move
> it before the RSI changes)
> 3. Move RSI dev as Auxilliary
> 4. Add the firmware sysfs ABI.
> 

I agree with the logical split of functionality above.

> That way, first two could be merged while we figure out (3) and (4)
> 

I disagree with this though. We don't want to merge this unless (3) is
agreed. There is no point in doing that unless we agree the approach for
RSI as well.

-- 
Regards,
Sudeep

^ permalink raw reply

* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Aneesh Kumar K.V @ 2026-05-14 12:48 UTC (permalink / raw)
  To: Mostafa Saleh
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Jason Gunthorpe,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <agW5rhE9n2gDQ0w5@google.com>

Mostafa Saleh <smostafa@google.com> writes:

> On Thu, May 14, 2026 at 11:24:42AM +0530, Aneesh Kumar K.V wrote:
>> Mostafa Saleh <smostafa@google.com> writes:
>> 
>> > On Tue, May 12, 2026 at 02:33:59PM +0530, Aneesh Kumar K.V (Arm) wrote:
>> >> Teach swiotlb to distinguish between encrypted and decrypted bounce
>> >> buffer pools, and make allocation and mapping paths select a pool whose
>> >> state matches the requested DMA attributes.
>> >> 
>> >> Add a decrypted flag to io_tlb_mem, initialize it for the default and
>> >> restricted pools, and propagate DMA_ATTR_CC_SHARED into swiotlb pool
>> >> allocation. Reject swiotlb alloc/map requests when the selected pool does
>> >> not match the required encrypted/decrypted state.
>> >> 
>> >> Also return DMA addresses with the matching phys_to_dma_{encrypted,
>> >> unencrypted} helper so the DMA address encoding stays consistent with the
>> >> chosen pool.
>> >> 
>> >> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
>> >> ---
>> >>  include/linux/dma-direct.h |  10 ++++
>> >>  include/linux/swiotlb.h    |   8 ++-
>> >>  kernel/dma/direct.c        |  14 +++--
>> >>  kernel/dma/swiotlb.c       | 108 +++++++++++++++++++++++++++----------
>> >>  4 files changed, 107 insertions(+), 33 deletions(-)
>> >> 
>> >> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
>> >> index c249912456f9..94fad4e7c11e 100644
>> >> --- a/include/linux/dma-direct.h
>> >> +++ b/include/linux/dma-direct.h
>> >> @@ -77,6 +77,10 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map)
>> >>  #ifndef phys_to_dma_unencrypted
>> >>  #define phys_to_dma_unencrypted		phys_to_dma
>> >>  #endif
>> >> +
>> >> +#ifndef phys_to_dma_encrypted
>> >> +#define phys_to_dma_encrypted		phys_to_dma
>> >> +#endif
>> >>  #else
>> >>  static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
>> >>  {
>> >> @@ -90,6 +94,12 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
>> >>  {
>> >>  	return dma_addr_unencrypted(__phys_to_dma(dev, paddr));
>> >>  }
>> >> +
>> >> +static inline dma_addr_t phys_to_dma_encrypted(struct device *dev,
>> >> +		phys_addr_t paddr)
>> >> +{
>> >> +	return dma_addr_encrypted(__phys_to_dma(dev, paddr));
>> >> +}
>> >>  /*
>> >>   * If memory encryption is supported, phys_to_dma will set the memory encryption
>> >>   * bit in the DMA address, and dma_to_phys will clear it.
>> >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
>> >> index 3dae0f592063..b3fa3c6e0169 100644
>> >> --- a/include/linux/swiotlb.h
>> >> +++ b/include/linux/swiotlb.h
>> >> @@ -81,6 +81,7 @@ struct io_tlb_pool {
>> >>  	struct list_head node;
>> >>  	struct rcu_head rcu;
>> >>  	bool transient;
>> >> +	bool unencrypted;
>> >>  #endif
>> >>  };
>> >>  
>> >> @@ -111,6 +112,7 @@ struct io_tlb_mem {
>> >>  	struct dentry *debugfs;
>> >>  	bool force_bounce;
>> >>  	bool for_alloc;
>> >> +	bool unencrypted;
>> >>  #ifdef CONFIG_SWIOTLB_DYNAMIC
>> >>  	bool can_grow;
>> >>  	u64 phys_limit;
>> >> @@ -282,7 +284,8 @@ static inline void swiotlb_sync_single_for_cpu(struct device *dev,
>> >>  extern void swiotlb_print_info(void);
>> >>  
>> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
>> >> -struct page *swiotlb_alloc(struct device *dev, size_t size);
>> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
>> >> +		unsigned long attrs);
>> >>  bool swiotlb_free(struct device *dev, struct page *page, size_t size);
>> >>  
>> >>  static inline bool is_swiotlb_for_alloc(struct device *dev)
>> >> @@ -290,7 +293,8 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
>> >>  	return dev->dma_io_tlb_mem->for_alloc;
>> >>  }
>> >>  #else
>> >> -static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
>> >> +static inline struct page *swiotlb_alloc(struct device *dev, size_t size,
>> >> +		unsigned long attrs)
>> >>  {
>> >>  	return NULL;
>> >>  }
>> >> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
>> >> index dc2907439b3d..97ae4fa10521 100644
>> >> --- a/kernel/dma/direct.c
>> >> +++ b/kernel/dma/direct.c
>> >> @@ -104,9 +104,10 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page,
>> >>  	dma_free_contiguous(dev, page, size);
>> >>  }
>> >>  
>> >> -static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size)
>> >> +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size,
>> >> +		unsigned long attrs)
>> >>  {
>> >> -	struct page *page = swiotlb_alloc(dev, size);
>> >> +	struct page *page = swiotlb_alloc(dev, size, attrs);
>> >>  
>> >>  	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
>> >>  		swiotlb_free(dev, page, size);
>> >> @@ -266,8 +267,12 @@ void *dma_direct_alloc(struct device *dev, size_t size,
>> >>  						  gfp, attrs);
>> >>  
>> >>  	if (is_swiotlb_for_alloc(dev)) {
>> >> -		page = dma_direct_alloc_swiotlb(dev, size);
>> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
>> >>  		if (page) {
>> >> +			/*
>> >> +			 * swiotlb allocations comes from pool already marked
>> >> +			 * decrypted
>> >> +			 */
>> >>  			mark_mem_decrypt = false;
>> >>  			goto setup_page;
>> >>  		}
>> >> @@ -374,6 +379,7 @@ void dma_direct_free(struct device *dev, size_t size,
>> >>  		return;
>> >>  
>> >>  	if (swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)))
>> >> +		/* Swiotlb doesn't need a page attribute update on free */
>> >>  		mark_mem_encrypted = false;
>> >>  
>> >>  	if (is_vmalloc_addr(cpu_addr)) {
>> >> @@ -403,7 +409,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
>> >>  						  gfp, attrs);
>> >>  
>> >>  	if (is_swiotlb_for_alloc(dev)) {
>> >> -		page = dma_direct_alloc_swiotlb(dev, size);
>> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
>> >>  		if (!page)
>> >>  			return NULL;
>> >>  
>> >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
>> >> index ab4eccbaa076..065663be282c 100644
>> >> --- a/kernel/dma/swiotlb.c
>> >> +++ b/kernel/dma/swiotlb.c
>> >> @@ -259,10 +259,21 @@ void __init swiotlb_update_mem_attributes(void)
>> >>  	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
>> >>  	unsigned long bytes;
>> >>  
>> >> +	/*
>> >> +	 * if platform support memory encryption, swiotlb buffers are
>> >> +	 * decrypted by default.
>> >> +	 */
>> >> +	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
>> >> +		io_tlb_default_mem.unencrypted = true;
>> >> +	else
>> >> +		io_tlb_default_mem.unencrypted = false;
>> >> +
>> >>  	if (!mem->nslabs || mem->late_alloc)
>> >>  		return;
>> >>  	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
>> >> -	set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
>> >> +
>> >> +	if (io_tlb_default_mem.unencrypted)
>> >> +		set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
>> >>  }
>> >>  
>> >>  static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
>> >> @@ -505,8 +516,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
>> >>  	if (!mem->slots)
>> >>  		goto error_slots;
>> >>  
>> >> -	set_memory_decrypted((unsigned long)vstart,
>> >> -			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
>> >> +	if (io_tlb_default_mem.unencrypted)
>> >> +		set_memory_decrypted((unsigned long)vstart,
>> >> +				     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
>> >> +
>> >>  	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
>> >>  				 nareas);
>> >>  	add_mem_pool(&io_tlb_default_mem, mem);
>> >> @@ -539,7 +552,9 @@ void __init swiotlb_exit(void)
>> >>  	tbl_size = PAGE_ALIGN(mem->end - mem->start);
>> >>  	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
>> >>  
>> >> -	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
>> >> +	if (io_tlb_default_mem.unencrypted)
>> >> +		set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
>> >> +
>> >>  	if (mem->late_alloc) {
>> >>  		area_order = get_order(array_size(sizeof(*mem->areas),
>> >>  			mem->nareas));
>> >> @@ -563,6 +578,7 @@ void __init swiotlb_exit(void)
>> >>   * @gfp:	GFP flags for the allocation.
>> >>   * @bytes:	Size of the buffer.
>> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
>> >> + * @unencrypted: true to allocate unencrypted memory, false for encrypted memory
>> >>   *
>> >>   * Allocate pages from the buddy allocator. If successful, make the allocated
>> >>   * pages decrypted that they can be used for DMA.
>> >> @@ -570,7 +586,8 @@ void __init swiotlb_exit(void)
>> >>   * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
>> >>   * if the allocated physical address was above @phys_limit.
>> >>   */
>> >> -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
>> >> +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes,
>> >> +		u64 phys_limit, bool unencrypted)
>> >>  {
>> >>  	unsigned int order = get_order(bytes);
>> >>  	struct page *page;
>> >> @@ -588,13 +605,13 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
>> >>  	}
>> >>  
>> >>  	vaddr = phys_to_virt(paddr);
>> >> -	if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> +	if (unencrypted && set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >>  		goto error;
>> >>  	return page;
>> >>  
>> >>  error:
>> >>  	/* Intentional leak if pages cannot be encrypted again. */
>> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> +	if (unencrypted && !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >>  		__free_pages(page, order);
>> >>  	return NULL;
>> >>  }
>> >> @@ -604,30 +621,26 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
>> >>   * @dev:	Device for which a memory pool is allocated.
>> >>   * @bytes:	Size of the buffer.
>> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
>> >> + * @attrs:	DMA attributes for the allocation.
>> >>   * @gfp:	GFP flags for the allocation.
>> >>   *
>> >>   * Return: Allocated pages, or %NULL on allocation failure.
>> >>   */
>> >>  static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
>> >> -		u64 phys_limit, gfp_t gfp)
>> >> +		u64 phys_limit, unsigned long attrs, gfp_t gfp)
>> >>  {
>> >>  	struct page *page;
>> >> -	unsigned long attrs = 0;
>> >>  
>> >>  	/*
>> >>  	 * Allocate from the atomic pools if memory is encrypted and
>> >>  	 * the allocation is atomic, because decrypting may block.
>> >>  	 */
>> >> -	if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
>> >> +	if (!gfpflags_allow_blocking(gfp) && (attrs & DMA_ATTR_CC_SHARED)) {
>> >>  		void *vaddr;
>> >>  
>> >>  		if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
>> >>  			return NULL;
>> >>  
>> >> -		/* swiotlb considered decrypted by default */
>> >> -		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
>> >> -			attrs = DMA_ATTR_CC_SHARED;
>> >> -
>> >>  		return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
>> >>  					   attrs, dma_coherent_ok);
>> >>  	}
>> >> @@ -638,7 +651,8 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
>> >>  	else if (phys_limit <= DMA_BIT_MASK(32))
>> >>  		gfp |= __GFP_DMA32;
>> >>  
>> >> -	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
>> >> +	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit,
>> >> +					     !!(attrs & DMA_ATTR_CC_SHARED)))) {
>> >>  		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
>> >>  		    phys_limit < DMA_BIT_MASK(64) &&
>> >>  		    !(gfp & (__GFP_DMA32 | __GFP_DMA)))
>> >> @@ -657,15 +671,18 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
>> >>   * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
>> >>   * @vaddr:	Virtual address of the buffer.
>> >>   * @bytes:	Size of the buffer.
>> >> + * @unencrypted: true if @vaddr was allocated decrypted and must be
>> >> + *	re-encrypted before being freed
>> >>   */
>> >> -static void swiotlb_free_tlb(void *vaddr, size_t bytes)
>> >> +static void swiotlb_free_tlb(void *vaddr, size_t bytes, bool unencrypted)
>> >>  {
>> >>  	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
>> >>  	    dma_free_from_pool(NULL, vaddr, bytes))
>> >>  		return;
>> >>  
>> >>  	/* Intentional leak if pages cannot be encrypted again. */
>> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >> +	if (!unencrypted ||
>> >> +	    !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
>> >>  		__free_pages(virt_to_page(vaddr), get_order(bytes));
>> >>  }
>> >>  
>> >> @@ -676,6 +693,7 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
>> >>   * @nslabs:	Desired (maximum) number of slabs.
>> >>   * @nareas:	Number of areas.
>> >>   * @phys_limit:	Maximum DMA buffer physical address.
>> >> + * @attrs:	DMA attributes for the allocation.
>> >>   * @gfp:	GFP flags for the allocations.
>> >>   *
>> >>   * Allocate and initialize a new IO TLB memory pool. The actual number of
>> >> @@ -686,7 +704,8 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
>> >>   */
>> >>  static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
>> >>  		unsigned long minslabs, unsigned long nslabs,
>> >> -		unsigned int nareas, u64 phys_limit, gfp_t gfp)
>> >> +		unsigned int nareas, u64 phys_limit, unsigned long attrs,
>> >> +		gfp_t gfp)
>> >>  {
>> >>  	struct io_tlb_pool *pool;
>> >>  	unsigned int slot_order;
>> >> @@ -704,9 +723,10 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
>> >>  	if (!pool)
>> >>  		goto error;
>> >>  	pool->areas = (void *)pool + sizeof(*pool);
>> >> +	pool->unencrypted = !!(attrs & DMA_ATTR_CC_SHARED);
>> >>  
>> >>  	tlb_size = nslabs << IO_TLB_SHIFT;
>> >> -	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
>> >> +	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, attrs, gfp))) {
>> >>  		if (nslabs <= minslabs)
>> >>  			goto error_tlb;
>> >>  		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
>> >> @@ -724,7 +744,8 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
>> >>  	return pool;
>> >>  
>> >>  error_slots:
>> >> -	swiotlb_free_tlb(page_address(tlb), tlb_size);
>> >> +	swiotlb_free_tlb(page_address(tlb), tlb_size,
>> >> +			 !!(attrs & DMA_ATTR_CC_SHARED));
>> >>  error_tlb:
>> >>  	kfree(pool);
>> >>  error:
>> >> @@ -742,7 +763,9 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
>> >>  	struct io_tlb_pool *pool;
>> >>  
>> >>  	pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
>> >> -				  default_nareas, mem->phys_limit, GFP_KERNEL);
>> >> +				  default_nareas, mem->phys_limit,
>> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
>> >> +				  GFP_KERNEL);
>> >>  	if (!pool) {
>> >>  		pr_warn_ratelimited("Failed to allocate new pool");
>> >>  		return;
>> >> @@ -762,7 +785,7 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
>> >>  	size_t tlb_size = pool->end - pool->start;
>> >>  
>> >>  	free_pages((unsigned long)pool->slots, get_order(slots_size));
>> >> -	swiotlb_free_tlb(pool->vaddr, tlb_size);
>> >> +	swiotlb_free_tlb(pool->vaddr, tlb_size, pool->unencrypted);
>> >>  	kfree(pool);
>> >>  }
>> >>  
>> >> @@ -1232,6 +1255,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
>> >>  	nslabs = nr_slots(alloc_size);
>> >>  	phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
>> >>  	pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
>> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
>> >>  				  GFP_NOWAIT);
>> >>  	if (!pool)
>> >>  		return -1;
>> >> @@ -1394,6 +1418,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>> >>  		enum dma_data_direction dir, unsigned long attrs)
>> >>  {
>> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>> >> +	bool require_decrypted = false;
>> >>  	unsigned int offset;
>> >>  	struct io_tlb_pool *pool;
>> >>  	unsigned int i;
>> >> @@ -1411,6 +1436,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
>> >>  	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
>> >>  		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
>> >>  
>> >> +	/*
>> >> +	 * if we are trying to swiotlb map a decrypted paddr or the paddr is encrypted
>> >> +	 * but the device is forcing decryption, use decrypted io_tlb_mem
>> >> +	 */
>> >> +	if ((attrs & DMA_ATTR_CC_SHARED) || force_dma_unencrypted(dev))
>> >> +		require_decrypted = true;
>> >> +
>> >> +	if (require_decrypted != mem->unencrypted)
>> >> +		return (phys_addr_t)DMA_MAPPING_ERROR;
>> >> +
>> >>  	/*
>> >>  	 * The default swiotlb memory pool is allocated with PAGE_SIZE
>> >>  	 * alignment. If a mapping is requested with larger alignment,
>> >> @@ -1608,8 +1643,14 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
>> >>  	if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
>> >>  		return DMA_MAPPING_ERROR;
>> >>  
>> >> -	/* Ensure that the address returned is DMA'ble */
>> >> -	dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
>> >> +	/*
>> >> +	 * Use the allocated io_tlb_mem encryption type to determine dma addr.
>> >> +	 */
>> >> +	if (dev->dma_io_tlb_mem->unencrypted)
>> >> +		dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
>> >> +	else
>> >> +		dma_addr = phys_to_dma_encrypted(dev, swiotlb_addr);
>> >> +
>> >>  	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
>> >>  		__swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
>> >>  			attrs | DMA_ATTR_SKIP_CPU_SYNC,
>> >> @@ -1773,7 +1814,8 @@ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
>> >>  
>> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
>> >>  
>> >> -struct page *swiotlb_alloc(struct device *dev, size_t size)
>> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
>> >> +		unsigned long attrs)
>> >>  {
>> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>> >>  	struct io_tlb_pool *pool;
>> >> @@ -1784,6 +1826,9 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
>> >>  	if (!mem)
>> >>  		return NULL;
>> >>  
>> >> +	if (mem->unencrypted != !!(attrs & DMA_ATTR_CC_SHARED))
>> >> +		return NULL;
>> >> +
>> >>  	align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
>> >>  	index = swiotlb_find_slots(dev, 0, size, align, &pool);
>> >>  	if (index == -1)
>> >> @@ -1853,9 +1898,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
>> >>  			kfree(mem);
>> >>  			return -ENOMEM;
>> >>  		}
>> >> +		/*
>> >> +		 * if platform supports memory encryption,
>> >> +		 * restricted mem pool is decrypted by default
>> >> +		 */
>> >> +		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
>> >> +			mem->unencrypted = true;
>> >> +			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
>> >> +					     rmem->size >> PAGE_SHIFT);
>> >> +		} else {
>> >> +			mem->unencrypted = false;
>> >> +		}
>> >
>> > This breaks pKVM as it doesn’t set CC_ATTR_MEM_ENCRYPT, so all virtio
>> > traffic now fails.
>> >
>> > Also, by design, some drivers are clueless about bouncing, so
>> > I believe that the pool should have a way to control it’s property
>> > (encrypted or decrypted) and that takes priority over whatever
>> > attributes comes from allocation.
>> > And that brings us to the same point whether it’s better to return
>> > the memory along with it’s state or we pass the requested state.
>> > I think for other cases it’s fine for the device/DMA-API to dictate
>> > the attrs, but not in restricted-dma case, the firmware just knows better.
>> >
>> 
>> Is it that the pKVM guest kernel does not have awareness of
>> encrypted/decrypted DMA allocations? Instead, the firmware attaches
>> hypervisor-shared pages to the device via restricted-dma-pool? The
>> kernel then has swiotlb->for_alloc = true, and hence all DMA allocations
>> go through the restricted-dma-pool?
>
> Yes.
>
>> 
>> Given that pKVM supports pkvm_set_memory_encrypted() and
>> pkvm_set_memory_decrypted(), can we consider adding CC_ATTR_MEM_ENCRYPT
>> support to pKVM? It would also be good to investigate whether we can set
>> force_dma_unencrypted(dev) to true where needed.
>
> I was looking in to that, but it didn't work because
> force_dma_unencrypted() is broken with restricted-dma due to the
> double decryption issue, that's when I sent my first series [1]
>
> May be we should land some basic fixes for that path so we can
> convert pKVM, then we do the full rework.
>
> I will revive my old work and see if I can send a RFC.
>
> [1] https://lore.kernel.org/all/20260305170335.963568-1-smostafa@google.com/
>

With this series, can you check whether the only change needed is
something like the following?

modified   kernel/dma/swiotlb.c
@@ -1905,7 +1905,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 		 * if platform supports memory encryption,
 		 * restricted mem pool is decrypted by default
 		 */
-		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
+		//if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
+		if (true) {
 			mem->unencrypted = true;
 			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
 					     rmem->size >> PAGE_SHIFT);

>
>> 
>> I agree that this patch, as it stands, can break pKVM because we are now
>> missing the set_memory_decrypted() call required for pKVM to work.
>> 
>> We now mark the swiotlb io_tlb_mem as unencrypted/encrypted in the guest
>> using struct io_tlb_mem->unencrypted. I am not clear what we can use for
>> pKVM to conditionalize this so that it works for both protected and
>> unprotected guests.
>
> There is no problem with non-protected guests as they don't use memory
> encryption, my initial thought was that th encrpyted/decrypted is
> per-pool property which is decided by FW (device-tree).
>

What I meant was that we need a generic way to identify a pKVM guest, so
that we can use it in the conditional above.

-aneesh

^ permalink raw reply

* Re: [PATCH v5 0/3] Switch Arm CCA to use an auxiliary device instead of a platform device
From: Greg KH @ 2026-05-14 12:45 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-coco, linux-arm-kernel, linux-kernel, Catalin Marinas,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Sudeep Holla, Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <yq5ase7u5kmz.fsf@kernel.org>

On Thu, May 14, 2026 at 04:21:48PM +0530, Aneesh Kumar K.V wrote:
> Greg KH <gregkh@linuxfoundation.org> writes:
> 
> > On Thu, May 14, 2026 at 03:10:27PM +0530, Aneesh Kumar K.V (Arm) wrote:
> >> As discussed here:
> >> https://lore.kernel.org/all/20250728135216.48084-12-aneesh.kumar@kernel.org
> >> 
> >> The general feedback was that a platform device should not be used when
> >> there is no underlying platform resource to represent. The existing CCA
> >> support uses a platform device solely to anchor the TSM interface in the
> >> device hierarchy, which is not an appropriate use of a platform device.
> >> Use an auxiliary device instead to track CCA support.
> >
> > Why an aux device?  If this has no platform resources, please use the
> > faux bus support instead, that is what it is there for.  aux devices are
> > used when you are sharing a real resource among different "child"
> > drivers, and need some way to coordinate that sharing.  If you have no
> > resources, there's nothing to share, so no need for the complexity that
> > aux gives you, just use faux instead.
> >
> 
> We did discuss between faux an auxiliary devices early here
> https://lore.kernel.org/all/20251010135922.GC3833649@ziepe.ca
> 
> To summarize auxiliary device was choosen so that we can do module
> autoloading.

That's not a valid reason to use the aux driver, sorry.  If you have
hardware that triggers an auto-module-load, then this is really a
hardware driver.  If it is a "virtual" driver like this, then you need
to explicitly load it on your own.  Don't abuse apis for reasons that
they are not designed for.

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Jason Gunthorpe @ 2026-05-14 12:35 UTC (permalink / raw)
  To: Mostafa Saleh
  Cc: Aneesh Kumar K.V (Arm), iommu, linux-arm-kernel, linux-kernel,
	linux-coco, Robin Murphy, Marek Szyprowski, Will Deacon,
	Marc Zyngier, Steven Price, Suzuki K Poulose, Catalin Marinas,
	Jiri Pirko, Petr Tesarik, Alexey Kardashevskiy, Dan Williams,
	Xu Yilun, linuxppc-dev, linux-s390, Madhavan Srinivasan,
	Michael Ellerman, Nicholas Piggin, Christophe Leroy (CS GROUP),
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <agW2lzJI-20DyJVe@google.com>

> > How will pKVM signal what kind of memory the DMA needs then?
> > 
> > Does it use set_memory_decrypted()? How can it use
> > set_memory_decrypted() without offering CC_ATTR_MEM_ENCRYPT ?
> 
> pKVM (hypervisor) doesn’t signal anything.
> The VMM when running protected guests will use restricted dma-pools
> for emulated vritio devices in the guest, which gets decrypted by
> the guest kernel and hence shared with the host kernel, and then
> traffic is bounced via the pool.

That really does sound like CC and set_memory_decrypted() to me..

> It’s also worth noting that bouncing here isn't just about visibility.
> Because memory sharing operates at page granularity, bouncing sub-page
> allocations through the restricted pool prevents adjacent, sensitive
> guest data from being exposed to the untrusted host.

That's a somewhat different problem, we have the dev->trusted stuff
that is supposed to deal with this kind of security. We need it for
IOMMU based systems too, eg hot plug thunderbolt should have it.

Then CC issue is more that the DMA API can't decrypt random passed in
memory because doing so often requires changing the PTEs pointing at
the page so it would break everything if done transparently.

> > > I believe that the pool should have a way to control it’s property
> > > (encrypted or decrypted) and that takes priority over whatever
> > > attributes comes from allocation.
> > 
> > We should get here because dma_capable() fails, and then swiotlb needs
> > to return something that makes dma_capable() succeed. Yes, it should
> > return details about the thing it decided, but it shouldn't have been
> > pre-created with some idea how to make dma_capable() work.
> 
> That sounds neat, but at the end we have force_dma_unencrypted() in
> dma_capable() which is just hardcoded to true/false by the platform.

For now, the next step is it becomes per-device and dynamic during the
device lifecycle.

> How is that different from having the state static by the pool?

statically attached pools to the device are not so flexible when
devices have dynamically changing capabilities..

> > If dma_capable() can fail, then swiotlb should know exactly what to do
> > to fix it.
> 
> dma_capable() returns a bool, I don’t think it can know what exactly
> went wrong (based on address, size, attrs, dev...)

Yes, but I think the design is swiotlb is supposed to re-inspect what
is going on against the limits dma_capable checks and then select the
correct remedy..

> While we can debate the aesthetics of the setup , this is
> the exisitng behaviour for Linux, which existed for years
> and pKVM relies on and is used extensively.
> And, this patch alters that long-standing logic and introduces
> a functional regression.

Yeah, Aneesh needs to do something here, I'm pointing out it is
entirely seperate thing from the CC path we are working on which is
decoupling CC from reylying on force swiotlb.

> We can address this by either adjusting this patch or by changing
> pKVM guests to be more aligned with other CCA guests which is
> something I have been wondering about if it would help reduce
> bouncing.

Every time I look at pkvm I think it is just ARM CCA with a different
design and no access to the unique HW features..

> > If we can make that work then maybe the flows are designed correctly.
> 
> Mmm, I am not sure I understand this one, shouldn’t the device also be
> notified about the switch in memory state, if it expects to read/write
> decrypted memory, how would that work if the kernel changes it to an
> encrypted one?

Nothing on the device changes. In a CC world we put the device in a
T=0 or T=1 state before the driver loads and the expectation from the
DMA API is that the device will only use that T=x DMA type during
operation.

A T=1 state device can access all of memory, private or shared. Any
information the platform may need is encoded in the dma_addr_t or in
the S1 IOPTEs.

So we never need to tell the device driver what kind of memory the DMA
is targetting, and we NEVER expect a device in T=1 mode to have to
issue a T=0 DMA to use the DMA API.

In a pkvm world it should be the same, the S2 table for the SMMU will
control what the device can access, and if the SMMU points to a
"private" or "shared" page is not something the device needs to know
or care about.

Jason

^ permalink raw reply

* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Mostafa Saleh @ 2026-05-14 12:02 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-coco, Robin Murphy,
	Marek Szyprowski, Will Deacon, Marc Zyngier, Steven Price,
	Suzuki K Poulose, Catalin Marinas, Jiri Pirko, Jason Gunthorpe,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <yq5ah5oaa63h.fsf@kernel.org>

On Thu, May 14, 2026 at 11:24:42AM +0530, Aneesh Kumar K.V wrote:
> Mostafa Saleh <smostafa@google.com> writes:
> 
> > On Tue, May 12, 2026 at 02:33:59PM +0530, Aneesh Kumar K.V (Arm) wrote:
> >> Teach swiotlb to distinguish between encrypted and decrypted bounce
> >> buffer pools, and make allocation and mapping paths select a pool whose
> >> state matches the requested DMA attributes.
> >> 
> >> Add a decrypted flag to io_tlb_mem, initialize it for the default and
> >> restricted pools, and propagate DMA_ATTR_CC_SHARED into swiotlb pool
> >> allocation. Reject swiotlb alloc/map requests when the selected pool does
> >> not match the required encrypted/decrypted state.
> >> 
> >> Also return DMA addresses with the matching phys_to_dma_{encrypted,
> >> unencrypted} helper so the DMA address encoding stays consistent with the
> >> chosen pool.
> >> 
> >> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> >> ---
> >>  include/linux/dma-direct.h |  10 ++++
> >>  include/linux/swiotlb.h    |   8 ++-
> >>  kernel/dma/direct.c        |  14 +++--
> >>  kernel/dma/swiotlb.c       | 108 +++++++++++++++++++++++++++----------
> >>  4 files changed, 107 insertions(+), 33 deletions(-)
> >> 
> >> diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
> >> index c249912456f9..94fad4e7c11e 100644
> >> --- a/include/linux/dma-direct.h
> >> +++ b/include/linux/dma-direct.h
> >> @@ -77,6 +77,10 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map)
> >>  #ifndef phys_to_dma_unencrypted
> >>  #define phys_to_dma_unencrypted		phys_to_dma
> >>  #endif
> >> +
> >> +#ifndef phys_to_dma_encrypted
> >> +#define phys_to_dma_encrypted		phys_to_dma
> >> +#endif
> >>  #else
> >>  static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
> >>  {
> >> @@ -90,6 +94,12 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev,
> >>  {
> >>  	return dma_addr_unencrypted(__phys_to_dma(dev, paddr));
> >>  }
> >> +
> >> +static inline dma_addr_t phys_to_dma_encrypted(struct device *dev,
> >> +		phys_addr_t paddr)
> >> +{
> >> +	return dma_addr_encrypted(__phys_to_dma(dev, paddr));
> >> +}
> >>  /*
> >>   * If memory encryption is supported, phys_to_dma will set the memory encryption
> >>   * bit in the DMA address, and dma_to_phys will clear it.
> >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> >> index 3dae0f592063..b3fa3c6e0169 100644
> >> --- a/include/linux/swiotlb.h
> >> +++ b/include/linux/swiotlb.h
> >> @@ -81,6 +81,7 @@ struct io_tlb_pool {
> >>  	struct list_head node;
> >>  	struct rcu_head rcu;
> >>  	bool transient;
> >> +	bool unencrypted;
> >>  #endif
> >>  };
> >>  
> >> @@ -111,6 +112,7 @@ struct io_tlb_mem {
> >>  	struct dentry *debugfs;
> >>  	bool force_bounce;
> >>  	bool for_alloc;
> >> +	bool unencrypted;
> >>  #ifdef CONFIG_SWIOTLB_DYNAMIC
> >>  	bool can_grow;
> >>  	u64 phys_limit;
> >> @@ -282,7 +284,8 @@ static inline void swiotlb_sync_single_for_cpu(struct device *dev,
> >>  extern void swiotlb_print_info(void);
> >>  
> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
> >> -struct page *swiotlb_alloc(struct device *dev, size_t size);
> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
> >> +		unsigned long attrs);
> >>  bool swiotlb_free(struct device *dev, struct page *page, size_t size);
> >>  
> >>  static inline bool is_swiotlb_for_alloc(struct device *dev)
> >> @@ -290,7 +293,8 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
> >>  	return dev->dma_io_tlb_mem->for_alloc;
> >>  }
> >>  #else
> >> -static inline struct page *swiotlb_alloc(struct device *dev, size_t size)
> >> +static inline struct page *swiotlb_alloc(struct device *dev, size_t size,
> >> +		unsigned long attrs)
> >>  {
> >>  	return NULL;
> >>  }
> >> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> >> index dc2907439b3d..97ae4fa10521 100644
> >> --- a/kernel/dma/direct.c
> >> +++ b/kernel/dma/direct.c
> >> @@ -104,9 +104,10 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page,
> >>  	dma_free_contiguous(dev, page, size);
> >>  }
> >>  
> >> -static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size)
> >> +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size,
> >> +		unsigned long attrs)
> >>  {
> >> -	struct page *page = swiotlb_alloc(dev, size);
> >> +	struct page *page = swiotlb_alloc(dev, size, attrs);
> >>  
> >>  	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
> >>  		swiotlb_free(dev, page, size);
> >> @@ -266,8 +267,12 @@ void *dma_direct_alloc(struct device *dev, size_t size,
> >>  						  gfp, attrs);
> >>  
> >>  	if (is_swiotlb_for_alloc(dev)) {
> >> -		page = dma_direct_alloc_swiotlb(dev, size);
> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
> >>  		if (page) {
> >> +			/*
> >> +			 * swiotlb allocations comes from pool already marked
> >> +			 * decrypted
> >> +			 */
> >>  			mark_mem_decrypt = false;
> >>  			goto setup_page;
> >>  		}
> >> @@ -374,6 +379,7 @@ void dma_direct_free(struct device *dev, size_t size,
> >>  		return;
> >>  
> >>  	if (swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)))
> >> +		/* Swiotlb doesn't need a page attribute update on free */
> >>  		mark_mem_encrypted = false;
> >>  
> >>  	if (is_vmalloc_addr(cpu_addr)) {
> >> @@ -403,7 +409,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
> >>  						  gfp, attrs);
> >>  
> >>  	if (is_swiotlb_for_alloc(dev)) {
> >> -		page = dma_direct_alloc_swiotlb(dev, size);
> >> +		page = dma_direct_alloc_swiotlb(dev, size, attrs);
> >>  		if (!page)
> >>  			return NULL;
> >>  
> >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> >> index ab4eccbaa076..065663be282c 100644
> >> --- a/kernel/dma/swiotlb.c
> >> +++ b/kernel/dma/swiotlb.c
> >> @@ -259,10 +259,21 @@ void __init swiotlb_update_mem_attributes(void)
> >>  	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
> >>  	unsigned long bytes;
> >>  
> >> +	/*
> >> +	 * if platform support memory encryption, swiotlb buffers are
> >> +	 * decrypted by default.
> >> +	 */
> >> +	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >> +		io_tlb_default_mem.unencrypted = true;
> >> +	else
> >> +		io_tlb_default_mem.unencrypted = false;
> >> +
> >>  	if (!mem->nslabs || mem->late_alloc)
> >>  		return;
> >>  	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
> >> -	set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
> >> +
> >> +	if (io_tlb_default_mem.unencrypted)
> >> +		set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
> >>  }
> >>  
> >>  static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
> >> @@ -505,8 +516,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
> >>  	if (!mem->slots)
> >>  		goto error_slots;
> >>  
> >> -	set_memory_decrypted((unsigned long)vstart,
> >> -			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
> >> +	if (io_tlb_default_mem.unencrypted)
> >> +		set_memory_decrypted((unsigned long)vstart,
> >> +				     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
> >> +
> >>  	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
> >>  				 nareas);
> >>  	add_mem_pool(&io_tlb_default_mem, mem);
> >> @@ -539,7 +552,9 @@ void __init swiotlb_exit(void)
> >>  	tbl_size = PAGE_ALIGN(mem->end - mem->start);
> >>  	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
> >>  
> >> -	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
> >> +	if (io_tlb_default_mem.unencrypted)
> >> +		set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
> >> +
> >>  	if (mem->late_alloc) {
> >>  		area_order = get_order(array_size(sizeof(*mem->areas),
> >>  			mem->nareas));
> >> @@ -563,6 +578,7 @@ void __init swiotlb_exit(void)
> >>   * @gfp:	GFP flags for the allocation.
> >>   * @bytes:	Size of the buffer.
> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
> >> + * @unencrypted: true to allocate unencrypted memory, false for encrypted memory
> >>   *
> >>   * Allocate pages from the buddy allocator. If successful, make the allocated
> >>   * pages decrypted that they can be used for DMA.
> >> @@ -570,7 +586,8 @@ void __init swiotlb_exit(void)
> >>   * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
> >>   * if the allocated physical address was above @phys_limit.
> >>   */
> >> -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
> >> +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes,
> >> +		u64 phys_limit, bool unencrypted)
> >>  {
> >>  	unsigned int order = get_order(bytes);
> >>  	struct page *page;
> >> @@ -588,13 +605,13 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
> >>  	}
> >>  
> >>  	vaddr = phys_to_virt(paddr);
> >> -	if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> +	if (unencrypted && set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >>  		goto error;
> >>  	return page;
> >>  
> >>  error:
> >>  	/* Intentional leak if pages cannot be encrypted again. */
> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> +	if (unencrypted && !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >>  		__free_pages(page, order);
> >>  	return NULL;
> >>  }
> >> @@ -604,30 +621,26 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
> >>   * @dev:	Device for which a memory pool is allocated.
> >>   * @bytes:	Size of the buffer.
> >>   * @phys_limit:	Maximum allowed physical address of the buffer.
> >> + * @attrs:	DMA attributes for the allocation.
> >>   * @gfp:	GFP flags for the allocation.
> >>   *
> >>   * Return: Allocated pages, or %NULL on allocation failure.
> >>   */
> >>  static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
> >> -		u64 phys_limit, gfp_t gfp)
> >> +		u64 phys_limit, unsigned long attrs, gfp_t gfp)
> >>  {
> >>  	struct page *page;
> >> -	unsigned long attrs = 0;
> >>  
> >>  	/*
> >>  	 * Allocate from the atomic pools if memory is encrypted and
> >>  	 * the allocation is atomic, because decrypting may block.
> >>  	 */
> >> -	if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
> >> +	if (!gfpflags_allow_blocking(gfp) && (attrs & DMA_ATTR_CC_SHARED)) {
> >>  		void *vaddr;
> >>  
> >>  		if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
> >>  			return NULL;
> >>  
> >> -		/* swiotlb considered decrypted by default */
> >> -		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >> -			attrs = DMA_ATTR_CC_SHARED;
> >> -
> >>  		return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
> >>  					   attrs, dma_coherent_ok);
> >>  	}
> >> @@ -638,7 +651,8 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
> >>  	else if (phys_limit <= DMA_BIT_MASK(32))
> >>  		gfp |= __GFP_DMA32;
> >>  
> >> -	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
> >> +	while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit,
> >> +					     !!(attrs & DMA_ATTR_CC_SHARED)))) {
> >>  		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
> >>  		    phys_limit < DMA_BIT_MASK(64) &&
> >>  		    !(gfp & (__GFP_DMA32 | __GFP_DMA)))
> >> @@ -657,15 +671,18 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
> >>   * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
> >>   * @vaddr:	Virtual address of the buffer.
> >>   * @bytes:	Size of the buffer.
> >> + * @unencrypted: true if @vaddr was allocated decrypted and must be
> >> + *	re-encrypted before being freed
> >>   */
> >> -static void swiotlb_free_tlb(void *vaddr, size_t bytes)
> >> +static void swiotlb_free_tlb(void *vaddr, size_t bytes, bool unencrypted)
> >>  {
> >>  	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
> >>  	    dma_free_from_pool(NULL, vaddr, bytes))
> >>  		return;
> >>  
> >>  	/* Intentional leak if pages cannot be encrypted again. */
> >> -	if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >> +	if (!unencrypted ||
> >> +	    !set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
> >>  		__free_pages(virt_to_page(vaddr), get_order(bytes));
> >>  }
> >>  
> >> @@ -676,6 +693,7 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
> >>   * @nslabs:	Desired (maximum) number of slabs.
> >>   * @nareas:	Number of areas.
> >>   * @phys_limit:	Maximum DMA buffer physical address.
> >> + * @attrs:	DMA attributes for the allocation.
> >>   * @gfp:	GFP flags for the allocations.
> >>   *
> >>   * Allocate and initialize a new IO TLB memory pool. The actual number of
> >> @@ -686,7 +704,8 @@ static void swiotlb_free_tlb(void *vaddr, size_t bytes)
> >>   */
> >>  static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
> >>  		unsigned long minslabs, unsigned long nslabs,
> >> -		unsigned int nareas, u64 phys_limit, gfp_t gfp)
> >> +		unsigned int nareas, u64 phys_limit, unsigned long attrs,
> >> +		gfp_t gfp)
> >>  {
> >>  	struct io_tlb_pool *pool;
> >>  	unsigned int slot_order;
> >> @@ -704,9 +723,10 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
> >>  	if (!pool)
> >>  		goto error;
> >>  	pool->areas = (void *)pool + sizeof(*pool);
> >> +	pool->unencrypted = !!(attrs & DMA_ATTR_CC_SHARED);
> >>  
> >>  	tlb_size = nslabs << IO_TLB_SHIFT;
> >> -	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
> >> +	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, attrs, gfp))) {
> >>  		if (nslabs <= minslabs)
> >>  			goto error_tlb;
> >>  		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
> >> @@ -724,7 +744,8 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
> >>  	return pool;
> >>  
> >>  error_slots:
> >> -	swiotlb_free_tlb(page_address(tlb), tlb_size);
> >> +	swiotlb_free_tlb(page_address(tlb), tlb_size,
> >> +			 !!(attrs & DMA_ATTR_CC_SHARED));
> >>  error_tlb:
> >>  	kfree(pool);
> >>  error:
> >> @@ -742,7 +763,9 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
> >>  	struct io_tlb_pool *pool;
> >>  
> >>  	pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
> >> -				  default_nareas, mem->phys_limit, GFP_KERNEL);
> >> +				  default_nareas, mem->phys_limit,
> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
> >> +				  GFP_KERNEL);
> >>  	if (!pool) {
> >>  		pr_warn_ratelimited("Failed to allocate new pool");
> >>  		return;
> >> @@ -762,7 +785,7 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
> >>  	size_t tlb_size = pool->end - pool->start;
> >>  
> >>  	free_pages((unsigned long)pool->slots, get_order(slots_size));
> >> -	swiotlb_free_tlb(pool->vaddr, tlb_size);
> >> +	swiotlb_free_tlb(pool->vaddr, tlb_size, pool->unencrypted);
> >>  	kfree(pool);
> >>  }
> >>  
> >> @@ -1232,6 +1255,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> >>  	nslabs = nr_slots(alloc_size);
> >>  	phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
> >>  	pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
> >> +				  mem->unencrypted ? DMA_ATTR_CC_SHARED : 0,
> >>  				  GFP_NOWAIT);
> >>  	if (!pool)
> >>  		return -1;
> >> @@ -1394,6 +1418,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> >>  		enum dma_data_direction dir, unsigned long attrs)
> >>  {
> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> >> +	bool require_decrypted = false;
> >>  	unsigned int offset;
> >>  	struct io_tlb_pool *pool;
> >>  	unsigned int i;
> >> @@ -1411,6 +1436,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> >>  	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
> >>  		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
> >>  
> >> +	/*
> >> +	 * if we are trying to swiotlb map a decrypted paddr or the paddr is encrypted
> >> +	 * but the device is forcing decryption, use decrypted io_tlb_mem
> >> +	 */
> >> +	if ((attrs & DMA_ATTR_CC_SHARED) || force_dma_unencrypted(dev))
> >> +		require_decrypted = true;
> >> +
> >> +	if (require_decrypted != mem->unencrypted)
> >> +		return (phys_addr_t)DMA_MAPPING_ERROR;
> >> +
> >>  	/*
> >>  	 * The default swiotlb memory pool is allocated with PAGE_SIZE
> >>  	 * alignment. If a mapping is requested with larger alignment,
> >> @@ -1608,8 +1643,14 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
> >>  	if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
> >>  		return DMA_MAPPING_ERROR;
> >>  
> >> -	/* Ensure that the address returned is DMA'ble */
> >> -	dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
> >> +	/*
> >> +	 * Use the allocated io_tlb_mem encryption type to determine dma addr.
> >> +	 */
> >> +	if (dev->dma_io_tlb_mem->unencrypted)
> >> +		dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
> >> +	else
> >> +		dma_addr = phys_to_dma_encrypted(dev, swiotlb_addr);
> >> +
> >>  	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
> >>  		__swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
> >>  			attrs | DMA_ATTR_SKIP_CPU_SYNC,
> >> @@ -1773,7 +1814,8 @@ static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
> >>  
> >>  #ifdef CONFIG_DMA_RESTRICTED_POOL
> >>  
> >> -struct page *swiotlb_alloc(struct device *dev, size_t size)
> >> +struct page *swiotlb_alloc(struct device *dev, size_t size,
> >> +		unsigned long attrs)
> >>  {
> >>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> >>  	struct io_tlb_pool *pool;
> >> @@ -1784,6 +1826,9 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
> >>  	if (!mem)
> >>  		return NULL;
> >>  
> >> +	if (mem->unencrypted != !!(attrs & DMA_ATTR_CC_SHARED))
> >> +		return NULL;
> >> +
> >>  	align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
> >>  	index = swiotlb_find_slots(dev, 0, size, align, &pool);
> >>  	if (index == -1)
> >> @@ -1853,9 +1898,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
> >>  			kfree(mem);
> >>  			return -ENOMEM;
> >>  		}
> >> +		/*
> >> +		 * if platform supports memory encryption,
> >> +		 * restricted mem pool is decrypted by default
> >> +		 */
> >> +		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
> >> +			mem->unencrypted = true;
> >> +			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
> >> +					     rmem->size >> PAGE_SHIFT);
> >> +		} else {
> >> +			mem->unencrypted = false;
> >> +		}
> >
> > This breaks pKVM as it doesn’t set CC_ATTR_MEM_ENCRYPT, so all virtio
> > traffic now fails.
> >
> > Also, by design, some drivers are clueless about bouncing, so
> > I believe that the pool should have a way to control it’s property
> > (encrypted or decrypted) and that takes priority over whatever
> > attributes comes from allocation.
> > And that brings us to the same point whether it’s better to return
> > the memory along with it’s state or we pass the requested state.
> > I think for other cases it’s fine for the device/DMA-API to dictate
> > the attrs, but not in restricted-dma case, the firmware just knows better.
> >
> 
> Is it that the pKVM guest kernel does not have awareness of
> encrypted/decrypted DMA allocations? Instead, the firmware attaches
> hypervisor-shared pages to the device via restricted-dma-pool? The
> kernel then has swiotlb->for_alloc = true, and hence all DMA allocations
> go through the restricted-dma-pool?

Yes.

> 
> Given that pKVM supports pkvm_set_memory_encrypted() and
> pkvm_set_memory_decrypted(), can we consider adding CC_ATTR_MEM_ENCRYPT
> support to pKVM? It would also be good to investigate whether we can set
> force_dma_unencrypted(dev) to true where needed.

I was looking in to that, but it didn't work because
force_dma_unencrypted() is broken with restricted-dma due to the
double decryption issue, that's when I sent my first series [1]

May be we should land some basic fixes for that path so we can
convert pKVM, then we do the full rework.

I will revive my old work and see if I can send a RFC.

[1] https://lore.kernel.org/all/20260305170335.963568-1-smostafa@google.com/

> 
> I agree that this patch, as it stands, can break pKVM because we are now
> missing the set_memory_decrypted() call required for pKVM to work.
> 
> We now mark the swiotlb io_tlb_mem as unencrypted/encrypted in the guest
> using struct io_tlb_mem->unencrypted. I am not clear what we can use for
> pKVM to conditionalize this so that it works for both protected and
> unprotected guests.

There is no problem with non-protected guests as they don't use memory
encryption, my initial thought was that th encrpyted/decrypted is
per-pool property which is decided by FW (device-tree).

Thanks,
Mostafa

> 
> -aneesh
> 

^ permalink raw reply

* Re: [PATCH v4 04/13] dma: swiotlb: track pool encryption state and honor DMA_ATTR_CC_SHARED
From: Mostafa Saleh @ 2026-05-14 11:48 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Aneesh Kumar K.V (Arm), iommu, linux-arm-kernel, linux-kernel,
	linux-coco, Robin Murphy, Marek Szyprowski, Will Deacon,
	Marc Zyngier, Steven Price, Suzuki K Poulose, Catalin Marinas,
	Jiri Pirko, Petr Tesarik, Alexey Kardashevskiy, Dan Williams,
	Xu Yilun, linuxppc-dev, linux-s390, Madhavan Srinivasan,
	Michael Ellerman, Nicholas Piggin, Christophe Leroy (CS GROUP),
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <20260513172450.GR7702@ziepe.ca>

On Wed, May 13, 2026 at 02:24:50PM -0300, Jason Gunthorpe wrote:
> On Wed, May 13, 2026 at 02:27:14PM +0000, Mostafa Saleh wrote:
> 
> > > +		/*
> > > +		 * if platform supports memory encryption,
> > > +		 * restricted mem pool is decrypted by default
> > > +		 */
> > > +		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
> > > +			mem->unencrypted = true;
> > > +			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
> > > +					     rmem->size >> PAGE_SHIFT);
> > > +		} else {
> > > +			mem->unencrypted = false;
> > > +		}
> >
> > This breaks pKVM as it doesn’t set CC_ATTR_MEM_ENCRYPT, so all virtio
> > traffic now fails.
> 
> How will pKVM signal what kind of memory the DMA needs then?
> 
> Does it use set_memory_decrypted()? How can it use
> set_memory_decrypted() without offering CC_ATTR_MEM_ENCRYPT ?

pKVM (hypervisor) doesn’t signal anything.
The VMM when running protected guests will use restricted dma-pools
for emulated vritio devices in the guest, which gets decrypted by
the guest kernel and hence shared with the host kernel, and then
traffic is bounced via the pool.

It’s also worth noting that bouncing here isn't just about visibility.
Because memory sharing operates at page granularity, bouncing sub-page
allocations through the restricted pool prevents adjacent, sensitive
guest data from being exposed to the untrusted host.

> 
> > Also, by design, some drivers are clueless about bouncing, so
> 
> Oh? What does this mean? We take quite a dim view of drivers mis-using
> the DMA API..

Maybe clueless is not the right word, I mean when virtio drivers use
the DMA API they don’t know whether it’s going to bounce or not as
that is decided by dma-direct (and in other cases by dma-iommu,
but not for pKVM).

> 
> > I believe that the pool should have a way to control it’s property
> > (encrypted or decrypted) and that takes priority over whatever
> > attributes comes from allocation.
> 
> We should get here because dma_capable() fails, and then swiotlb needs
> to return something that makes dma_capable() succeed. Yes, it should
> return details about the thing it decided, but it shouldn't have been
> pre-created with some idea how to make dma_capable() work.

That sounds neat, but at the end we have force_dma_unencrypted() in
dma_capable() which is just hardcoded to true/false by the platform.
How is that different from having the state static by the pool?

> 
> If dma_capable() can fail, then swiotlb should know exactly what to do
> to fix it.

dma_capable() returns a bool, I don’t think it can know what exactly
went wrong (based on address, size, attrs, dev...)

> 
> If pkvm wants to use the hacky scheme where you force a swiotlb pool
> configuration during arch init with force swiotlb that's a somewhat
> different flow and, sure the forced pool should force do whatever it
> is forced to.
> 
> But lets try to keep them seperated in the discussion..

While we can debate the aesthetics of the setup , this is
the exisitng behaviour for Linux, which existed for years
and pKVM relies on and is used extensively.
And, this patch alters that long-standing logic and introduces
a functional regression.

We can address this by either adjusting this patch or by changing
pKVM guests to be more aligned with other CCA guests which is
something I have been wondering about if it would help reduce
bouncing.

> 
> > And that brings us to the same point whether it’s better to return
> > the memory along with it’s state or we pass the requested state.
> > I think for other cases it’s fine for the device/DMA-API to dictate
> > the attrs, but not in restricted-dma case, the firmware just knows better.
> 
> The memory type must be returned back at some level so downstream
> things can do the right transformation of the phys_addr_t.

Agreed, I believe that will be needed at least for
SWIOTLB/restricted-dma -> dma-API interactions.

> 
> One of the aspirational CC things that should work is a T=1 device
> tries to DMA from a decrypted page, finds the address is above the dma
> limit of the device, so it bounces it with SWIOTLB to an encrypted low
> address page and then the DMA API internal flow switiches from working
> with decrypted to encrypted phys_addr_t.
> 
> If we can make that work then maybe the flows are designed correctly.

Mmm, I am not sure I understand this one, shouldn’t the device also be
notified about the switch in memory state, if it expects to read/write
decrypted memory, how would that work if the kernel changes it to an
encrypted one?

Thanks,
Mostafa
> 
> Jason

^ permalink raw reply

* Re: [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Suzuki K Poulose @ 2026-05-14 11:04 UTC (permalink / raw)
  To: Aneesh Kumar K.V (Arm), linux-coco, linux-arm-kernel,
	linux-kernel
  Cc: Catalin Marinas, Greg KH, Jeremy Linton, Jonathan Cameron,
	Lorenzo Pieralisi, Mark Rutland, Sudeep Holla, Will Deacon,
	Steven Price
In-Reply-To: <20260514094030.42495-2-aneesh.kumar@kernel.org>

Hi Aneesh

On 14/05/2026 10:40, Aneesh Kumar K.V (Arm) wrote:
> Make the SMCCC driver responsible for registering the arm-smccc platform
> device and after confirming the relevant SMCCC function IDs, create
> the arm_cca_guest auxiliary device.
> 

There are a few changes squashed in to this patch. Please could we
split the patch in the following order ?

1. Add platform device for arm-smccc
2. Move TRNG to Auxilliary Device - (Even though it is a later patch, 
move it before the RSI changes)
3. Move RSI dev as Auxilliary
4. Add the firmware sysfs ABI.

That way, first two could be merged while we figure out (3) and (4)


> Also update the arm-cca-guest driver to use the auxiliary device
> interface instead of the platform device (arm-cca-dev). The removal of
> the platform device registration will follow in a subsequent patch,
> allowing this change to be applied without immediately breaking existing
> userspace dependencies [1].
> 
> [1] https://lore.kernel.org/all/4a7d84b2-2ec4-4773-a2d5-7b63d5c683cf@arm.com
> 
> Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
> ---
>   arch/arm64/include/asm/rsi.h                  |  2 +-
>   arch/arm64/kernel/rsi.c                       |  2 +-
>   drivers/firmware/smccc/Kconfig                |  1 +
>   drivers/firmware/smccc/Makefile               |  1 +
>   drivers/firmware/smccc/rmm.c                  | 24 ++++++++
>   drivers/firmware/smccc/rmm.h                  | 17 ++++++
>   drivers/firmware/smccc/smccc.c                | 17 ++++++
>   drivers/virt/coco/arm-cca-guest/Kconfig       |  1 +
>   drivers/virt/coco/arm-cca-guest/Makefile      |  2 +
>   .../{arm-cca-guest.c => arm-cca.c}            | 59 +++++++++----------
>   10 files changed, 94 insertions(+), 32 deletions(-)
>   create mode 100644 drivers/firmware/smccc/rmm.c
>   create mode 100644 drivers/firmware/smccc/rmm.h
>   rename drivers/virt/coco/arm-cca-guest/{arm-cca-guest.c => arm-cca.c} (84%)
> 
> diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
> index 88b50d660e85..2d2d363aaaee 100644
> --- a/arch/arm64/include/asm/rsi.h
> +++ b/arch/arm64/include/asm/rsi.h
> @@ -10,7 +10,7 @@
>   #include <linux/jump_label.h>
>   #include <asm/rsi_cmds.h>
>   
> -#define RSI_PDEV_NAME "arm-cca-dev"
> +#define RSI_DEV_NAME "arm-rsi-dev"
>   
>   DECLARE_STATIC_KEY_FALSE(rsi_present);
>   
> diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
> index 9e846ce4ef9c..8380e5ba88d2 100644
> --- a/arch/arm64/kernel/rsi.c
> +++ b/arch/arm64/kernel/rsi.c
> @@ -161,7 +161,7 @@ void __init arm64_rsi_init(void)
>   }
>   
>   static struct platform_device rsi_dev = {
> -	.name = RSI_PDEV_NAME,
> +	.name = "arm-cca-dev",
>   	.id = PLATFORM_DEVID_NONE
>   };
>   
> diff --git a/drivers/firmware/smccc/Kconfig b/drivers/firmware/smccc/Kconfig
> index 15e7466179a6..2b6984757241 100644
> --- a/drivers/firmware/smccc/Kconfig
> +++ b/drivers/firmware/smccc/Kconfig
> @@ -8,6 +8,7 @@ config HAVE_ARM_SMCCC
>   config HAVE_ARM_SMCCC_DISCOVERY
>   	bool
>   	depends on ARM_PSCI_FW
> +	select AUXILIARY_BUS
>   	default y
>   	help
>   	 SMCCC v1.0 lacked discoverability and hence PSCI v1.0 was updated
> diff --git a/drivers/firmware/smccc/Makefile b/drivers/firmware/smccc/Makefile
> index 40d19144a860..146dc3c03c20 100644
> --- a/drivers/firmware/smccc/Makefile
> +++ b/drivers/firmware/smccc/Makefile
> @@ -2,3 +2,4 @@
>   #
>   obj-$(CONFIG_HAVE_ARM_SMCCC_DISCOVERY)	+= smccc.o kvm_guest.o
>   obj-$(CONFIG_ARM_SMCCC_SOC_ID)	+= soc_id.o
> +obj-$(CONFIG_ARM64) += rmm.o
> diff --git a/drivers/firmware/smccc/rmm.c b/drivers/firmware/smccc/rmm.c
> new file mode 100644
> index 000000000000..728338cb5a22
> --- /dev/null
> +++ b/drivers/firmware/smccc/rmm.c
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2026 Arm Limited
> + */
> +
> +#include <linux/auxiliary_bus.h>
> +
> +#include "rmm.h"
> +
> +void __init register_rsi_device(struct platform_device *pdev)
> +{
> +	unsigned long ret;
> +	unsigned long ver_lower, ver_higher;
> +
> +	if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC)
> +		return;
> +
> +	ret = rsi_request_version(RSI_ABI_VERSION, &ver_lower, &ver_higher);
> +	if (ret != RSI_SUCCESS)
> +		return;
> +
> +	__devm_auxiliary_device_create(&pdev->dev,
> +				       "arm_cca_guest", RSI_DEV_NAME, NULL, 0);
> +}
> diff --git a/drivers/firmware/smccc/rmm.h b/drivers/firmware/smccc/rmm.h
> new file mode 100644
> index 000000000000..a47a650d4f51
> --- /dev/null
> +++ b/drivers/firmware/smccc/rmm.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _SMCCC_RMM_H
> +#define _SMCCC_RMM_H
> +
> +#include <linux/platform_device.h>
> +
> +#ifdef CONFIG_ARM64
> +#include <asm/rsi_cmds.h>
> +void __init register_rsi_device(struct platform_device *pdev);
> +#else
> +
> +static void __init register_rsi_device(struct platform_device *pdev)
> +{
> +
> +}
> +#endif
> +#endif
> diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
> index bdee057db2fd..eb077b9aa6da 100644
> --- a/drivers/firmware/smccc/smccc.c
> +++ b/drivers/firmware/smccc/smccc.c
> @@ -12,6 +12,8 @@
>   #include <linux/platform_device.h>
>   #include <asm/archrandom.h>
>   
> +#include "rmm.h"
> +
>   static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
>   static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
>   
> @@ -85,6 +87,21 @@ static int __init smccc_devices_init(void)
>   {
>   	struct platform_device *pdev;
>   
> +	if (smccc_conduit == SMCCC_CONDUIT_NONE)
> +		return 0;
> +
> +	pdev = platform_device_register_simple("arm-smccc",
> +					PLATFORM_DEVID_NONE, NULL, 0);
> +	if (IS_ERR(pdev)) {
> +		pr_err("arm-smccc: could not register device: %ld\n", PTR_ERR(pdev));
> +	} else {
> +		/*
> +		 * Register the RMI and RSI devices only when firmware exposes
> +		 * the required SMCCC function IDs at a supported revision.
> +		 */
> +		register_rsi_device(pdev);
> +	}
> +
>   	if (smccc_trng_available) {
>   		pdev = platform_device_register_simple("smccc_trng", -1,
>   						       NULL, 0);
> diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig
> index 3f0f013f03f1..a42359a90558 100644
> --- a/drivers/virt/coco/arm-cca-guest/Kconfig
> +++ b/drivers/virt/coco/arm-cca-guest/Kconfig
> @@ -2,6 +2,7 @@ config ARM_CCA_GUEST
>   	tristate "Arm CCA Guest driver"
>   	depends on ARM64
>   	select TSM_REPORTS
> +	select AUXILIARY_BUS
>   	help
>   	  The driver provides userspace interface to request and
>   	  attestation report from the Realm Management Monitor(RMM).
> diff --git a/drivers/virt/coco/arm-cca-guest/Makefile b/drivers/virt/coco/arm-cca-guest/Makefile
> index 69eeba08e98a..75a120e24fda 100644
> --- a/drivers/virt/coco/arm-cca-guest/Makefile
> +++ b/drivers/virt/coco/arm-cca-guest/Makefile
> @@ -1,2 +1,4 @@
>   # SPDX-License-Identifier: GPL-2.0-only
>   obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest.o
> +
> +arm-cca-guest-y +=  arm-cca.o
> diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca.c
> similarity index 84%
> rename from drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
> rename to drivers/virt/coco/arm-cca-guest/arm-cca.c
> index 0c9ea24a200c..7daada072cc0 100644
> --- a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
> +++ b/drivers/virt/coco/arm-cca-guest/arm-cca.c
> @@ -3,6 +3,7 @@
>    * Copyright (C) 2023 ARM Ltd.
>    */
>   
> +#include <linux/auxiliary_bus.h>
>   #include <linux/arm-smccc.h>
>   #include <linux/cc_platform.h>
>   #include <linux/kernel.h>
> @@ -181,52 +182,50 @@ static int arm_cca_report_new(struct tsm_report *report, void *data)
>   	return ret;
>   }
>   
> -static const struct tsm_report_ops arm_cca_tsm_ops = {
> +static const struct tsm_report_ops arm_cca_tsm_report_ops = {
>   	.name = KBUILD_MODNAME,
>   	.report_new = arm_cca_report_new,
>   };
>   
> -/**
> - * arm_cca_guest_init - Register with the Trusted Security Module (TSM)
> - * interface.
> - *
> - * Return:
> - * * %0        - Registered successfully with the TSM interface.
> - * * %-ENODEV  - The execution context is not an Arm Realm.
> - * * %-EBUSY   - Already registered.
> - */
> -static int __init arm_cca_guest_init(void)
> +static void unregister_cca_tsm_report(void *data)
> +{
> +	tsm_report_unregister(&arm_cca_tsm_report_ops);
> +}
> +
> +static int cca_devsec_tsm_probe(struct auxiliary_device *adev,

super minor nit: While I understand you plan to use this for DEV SEC TSM
in the future, could we retain the generic TSM name usage ?

> +		const struct auxiliary_device_id *id)
>   {
>   	int ret;
>   
>   	if (!is_realm_world())
>   		return -ENODEV;
>   
> -	ret = tsm_report_register(&arm_cca_tsm_ops, NULL);
> -	if (ret < 0)
> -		pr_err("Error %d registering with TSM\n", ret);
> +	ret = tsm_report_register(&arm_cca_tsm_report_ops, NULL);
> +	if (ret < 0) {
> +		dev_err_probe(&adev->dev, ret, "Error registering with TSM\n");
> +		return ret;
> +	}
>   
> -	return ret;
> -}
> -module_init(arm_cca_guest_init);
> +	ret = devm_add_action_or_reset(&adev->dev, unregister_cca_tsm_report, NULL);
> +	if (ret < 0) {
> +		dev_err_probe(&adev->dev, ret, "Error registering devm action\n");
> +		return ret;
> +	}
>   
> -/**
> - * arm_cca_guest_exit - unregister with the Trusted Security Module (TSM)
> - * interface.
> - */
> -static void __exit arm_cca_guest_exit(void)
> -{
> -	tsm_report_unregister(&arm_cca_tsm_ops);
> +	return 0;
>   }
> -module_exit(arm_cca_guest_exit);
>   
> -/* modalias, so userspace can autoload this module when RSI is available */
> -static const struct platform_device_id arm_cca_match[] __maybe_unused = {
> -	{ RSI_PDEV_NAME, 0},
> -	{ }
> +static const struct auxiliary_device_id cca_devsec_tsm_id_table[] = {

same as above, s/devsec_// ?

Suzuki


> +	{ .name =  KBUILD_MODNAME "." RSI_DEV_NAME },
> +	{}
>   };
> +MODULE_DEVICE_TABLE(auxiliary, cca_devsec_tsm_id_table);
>   
> -MODULE_DEVICE_TABLE(platform, arm_cca_match);
> +static struct auxiliary_driver cca_devsec_tsm_driver = {
> +	.probe = cca_devsec_tsm_probe,
> +	.id_table = cca_devsec_tsm_id_table,
> +};
> +module_auxiliary_driver(cca_devsec_tsm_driver);
>   MODULE_AUTHOR("Sami Mujawar <sami.mujawar@arm.com>");
>   MODULE_DESCRIPTION("Arm CCA Guest TSM Driver");
>   MODULE_LICENSE("GPL");


^ permalink raw reply

* Re: [PATCH v5 0/3] Switch Arm CCA to use an auxiliary device instead of a platform device
From: Aneesh Kumar K.V @ 2026-05-14 10:51 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-coco, linux-arm-kernel, linux-kernel, Catalin Marinas,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Sudeep Holla, Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <2026051453-batting-delighted-0a57@gregkh>

Greg KH <gregkh@linuxfoundation.org> writes:

> On Thu, May 14, 2026 at 03:10:27PM +0530, Aneesh Kumar K.V (Arm) wrote:
>> As discussed here:
>> https://lore.kernel.org/all/20250728135216.48084-12-aneesh.kumar@kernel.org
>> 
>> The general feedback was that a platform device should not be used when
>> there is no underlying platform resource to represent. The existing CCA
>> support uses a platform device solely to anchor the TSM interface in the
>> device hierarchy, which is not an appropriate use of a platform device.
>> Use an auxiliary device instead to track CCA support.
>
> Why an aux device?  If this has no platform resources, please use the
> faux bus support instead, that is what it is there for.  aux devices are
> used when you are sharing a real resource among different "child"
> drivers, and need some way to coordinate that sharing.  If you have no
> resources, there's nothing to share, so no need for the complexity that
> aux gives you, just use faux instead.
>

We did discuss between faux an auxiliary devices early here
https://lore.kernel.org/all/20251010135922.GC3833649@ziepe.ca

To summarize auxiliary device was choosen so that we can do module
autoloading.

-aneesh

^ permalink raw reply

* Re: [PATCH v5 0/3] Switch Arm CCA to use an auxiliary device instead of a platform device
From: Greg KH @ 2026-05-14 10:19 UTC (permalink / raw)
  To: Aneesh Kumar K.V (Arm)
  Cc: linux-coco, linux-arm-kernel, linux-kernel, Catalin Marinas,
	Jeremy Linton, Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland,
	Sudeep Holla, Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <20260514094030.42495-1-aneesh.kumar@kernel.org>

On Thu, May 14, 2026 at 03:10:27PM +0530, Aneesh Kumar K.V (Arm) wrote:
> As discussed here:
> https://lore.kernel.org/all/20250728135216.48084-12-aneesh.kumar@kernel.org
> 
> The general feedback was that a platform device should not be used when
> there is no underlying platform resource to represent. The existing CCA
> support uses a platform device solely to anchor the TSM interface in the
> device hierarchy, which is not an appropriate use of a platform device.
> Use an auxiliary device instead to track CCA support.

Why an aux device?  If this has no platform resources, please use the
faux bus support instead, that is what it is there for.  aux devices are
used when you are sharing a real resource among different "child"
drivers, and need some way to coordinate that sharing.  If you have no
resources, there's nothing to share, so no need for the complexity that
aux gives you, just use faux instead.

thanks,

greg k-h

^ permalink raw reply

* [PATCH v5 3/3] coco: guest: arm64: Replace dummy CCA device with sysfs ABI
From: Aneesh Kumar K.V (Arm) @ 2026-05-14  9:40 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <20260514094030.42495-1-aneesh.kumar@kernel.org>

The SMCCC firmware driver now creates the arm-smccc platform device and
instantiates the CCA auxiliary devices once the RSI ABI is discovered. The
arm64-specific arm-cca-dev platform device stub is therefore no longer
needed.

However, userspace has used the arm-cca-dev platform device to detect Arm
CCA Realm guests [1]. Removing it without a replacement would break that
detection and would also leave userspace depending on kernel device-model
details.

Add /sys/firmware/cca/realm_guest as a stable, architecture-provided ABI
for detecting whether the kernel is running as an Arm CCA Realm guest. The
file returns 1 in Realm world and 0 otherwise, similar to the existing s390
/sys/firmware/uv/prot_virt_guest interface for protected virtualization
guests.

Remove the dummy arm-cca-dev registration now that userspace has a
dedicated CCA Realm guest indicator, and document the new ABI in
Documentation/ABI/testing/sysfs-firmware-cca.

[1] https://lore.kernel.org/all/4a7d84b2-2ec4-4773-a2d5-7b63d5c683cf@arm.com

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 Documentation/ABI/testing/sysfs-firmware-cca | 10 +++++
 arch/arm64/kernel/rsi.c                      | 39 +++++++++++++++-----
 2 files changed, 39 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-cca

diff --git a/Documentation/ABI/testing/sysfs-firmware-cca b/Documentation/ABI/testing/sysfs-firmware-cca
new file mode 100644
index 000000000000..bf177d636b92
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-cca
@@ -0,0 +1,10 @@
+What:		/sys/firmware/cca/realm_guest
+Date:		May 2026
+Contact:	Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description:	Read-only. Indicates whether the kernel is running as an
+		Arm Confidential Compute Architecture (CCA) Realm guest.
+
+		The value is one of:
+
+		0: the kernel is not running as a Realm guest
+		1: the kernel is running as a Realm guest
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index 8380e5ba88d2..a3e9b3bb5679 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -9,6 +9,8 @@
 #include <linux/swiotlb.h>
 #include <linux/cc_platform.h>
 #include <linux/platform_device.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 
 #include <asm/io.h>
 #include <asm/mem_encrypt.h>
@@ -16,6 +18,7 @@
 #include <asm/rsi.h>
 
 static struct realm_config config;
+static struct kobject *cca_kobj;
 
 unsigned long prot_ns_shared;
 EXPORT_SYMBOL(prot_ns_shared);
@@ -160,17 +163,33 @@ void __init arm64_rsi_init(void)
 	static_branch_enable(&rsi_present);
 }
 
-static struct platform_device rsi_dev = {
-	.name = "arm-cca-dev",
-	.id = PLATFORM_DEVID_NONE
+static ssize_t cca_is_realm_guest(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", is_realm_world());
+}
+
+static struct kobj_attribute cca_realm_guest =
+	__ATTR(realm_guest, 0444, cca_is_realm_guest, NULL);
+
+static const struct attribute *cca_realm_attrs[] = {
+	&cca_realm_guest.attr,
+	NULL,
 };
 
-static int __init arm64_create_dummy_rsi_dev(void)
+static int __init realm_sysfs_init(void)
 {
-	if (is_realm_world() &&
-	    platform_device_register(&rsi_dev))
-		pr_err("failed to register rsi platform device\n");
-	return 0;
-}
+	int ret;
+
+	cca_kobj = kobject_create_and_add("cca", firmware_kobj);
+	if (!cca_kobj)
+		return -ENOMEM;
 
-arch_initcall(arm64_create_dummy_rsi_dev)
+	ret = sysfs_create_files(cca_kobj, cca_realm_attrs);
+	if (!ret)
+		return 0;
+
+	kobject_put(cca_kobj);
+	return ret;
+}
+device_initcall(realm_sysfs_init);
-- 
2.43.0


^ permalink raw reply related

* [PATCH v5 2/3] hwrng: arm_smccc_trng: Register as an auxiliary device
From: Aneesh Kumar K.V (Arm) @ 2026-05-14  9:40 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <20260514094030.42495-1-aneesh.kumar@kernel.org>

The SMCCC TRNG interface is a firmware-provided function rather than a
standalone platform device. Register it as an auxiliary device under the
arm-smccc platform device and convert the hwrng driver to an auxiliary
driver.

This keeps the TRNG device tied to the SMCCC core device while preserving
module autoloading through the auxiliary device ID table.

The conversion changes the device path from the old platform device path,
but no userspace dependency on that path was found. This was confirmed with
a Debian Code Search lookup for the existing platform device name/path.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 drivers/char/hw_random/arm_smccc_trng.c | 25 ++++++++++++++-----------
 drivers/firmware/smccc/smccc.c          | 24 +++++++++++++-----------
 2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c
index dcb8e7f37f25..5d56fcbcefa0 100644
--- a/drivers/char/hw_random/arm_smccc_trng.c
+++ b/drivers/char/hw_random/arm_smccc_trng.c
@@ -16,7 +16,7 @@
 #include <linux/device.h>
 #include <linux/hw_random.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
+#include <linux/auxiliary_bus.h>
 #include <linux/arm-smccc.h>
 
 #ifdef CONFIG_ARM64
@@ -94,29 +94,32 @@ static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 	return copied;
 }
 
-static int smccc_trng_probe(struct platform_device *pdev)
+static int smccc_trng_probe(struct auxiliary_device *adev,
+		const struct auxiliary_device_id *id)
 {
 	struct hwrng *trng;
 
-	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+	trng = devm_kzalloc(&adev->dev, sizeof(*trng), GFP_KERNEL);
 	if (!trng)
 		return -ENOMEM;
 
 	trng->name = "smccc_trng";
 	trng->read = smccc_trng_read;
 
-	return devm_hwrng_register(&pdev->dev, trng);
+	return devm_hwrng_register(&adev->dev, trng);
 }
 
-static struct platform_driver smccc_trng_driver = {
-	.driver = {
-		.name		= "smccc_trng",
-	},
-	.probe		= smccc_trng_probe,
+static const struct auxiliary_device_id smccc_trng_id_table[] = {
+	{ .name =  KBUILD_MODNAME ".smccc_trng" },
+	{}
 };
-module_platform_driver(smccc_trng_driver);
+MODULE_DEVICE_TABLE(auxiliary, smccc_trng_id_table);
 
-MODULE_ALIAS("platform:smccc_trng");
+static struct auxiliary_driver smccc_trng_driver = {
+	.probe	  = smccc_trng_probe,
+	.id_table = smccc_trng_id_table,
+};
+module_auxiliary_driver(smccc_trng_driver);
 MODULE_AUTHOR("Andre Przywara");
 MODULE_DESCRIPTION("Arm SMCCC TRNG firmware interface support");
 MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index eb077b9aa6da..49ac8172def4 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -10,6 +10,7 @@
 #include <linux/arm-smccc.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/auxiliary_bus.h>
 #include <asm/archrandom.h>
 
 #include "rmm.h"
@@ -94,20 +95,21 @@ static int __init smccc_devices_init(void)
 					PLATFORM_DEVID_NONE, NULL, 0);
 	if (IS_ERR(pdev)) {
 		pr_err("arm-smccc: could not register device: %ld\n", PTR_ERR(pdev));
-	} else {
-		/*
-		 * Register the RMI and RSI devices only when firmware exposes
-		 * the required SMCCC function IDs at a supported revision.
-		 */
-		register_rsi_device(pdev);
+		return 0;
 	}
+	/*
+	 * Register the RMI and RSI devices only when firmware exposes
+	 * the required SMCCC function IDs at a supported revision.
+	 */
+	register_rsi_device(pdev);
 
 	if (smccc_trng_available) {
-		pdev = platform_device_register_simple("smccc_trng", -1,
-						       NULL, 0);
-		if (IS_ERR(pdev))
-			pr_err("smccc_trng: could not register device: %ld\n",
-			       PTR_ERR(pdev));
+		struct auxiliary_device *adev;
+
+		adev = __devm_auxiliary_device_create(&pdev->dev,
+					"arm_smccc_trng", "smccc_trng", NULL, 0);
+		if (!adev)
+			pr_err("smccc_trng: could not register device\n");
 	}
 
 	return 0;
-- 
2.43.0


^ permalink raw reply related

* [PATCH v5 1/3] firmware: smccc: coco: Manage arm-smccc platform device and CCA auxiliary drivers
From: Aneesh Kumar K.V (Arm) @ 2026-05-14  9:40 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose
In-Reply-To: <20260514094030.42495-1-aneesh.kumar@kernel.org>

Make the SMCCC driver responsible for registering the arm-smccc platform
device and after confirming the relevant SMCCC function IDs, create
the arm_cca_guest auxiliary device.

Also update the arm-cca-guest driver to use the auxiliary device
interface instead of the platform device (arm-cca-dev). The removal of
the platform device registration will follow in a subsequent patch,
allowing this change to be applied without immediately breaking existing
userspace dependencies [1].

[1] https://lore.kernel.org/all/4a7d84b2-2ec4-4773-a2d5-7b63d5c683cf@arm.com

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 arch/arm64/include/asm/rsi.h                  |  2 +-
 arch/arm64/kernel/rsi.c                       |  2 +-
 drivers/firmware/smccc/Kconfig                |  1 +
 drivers/firmware/smccc/Makefile               |  1 +
 drivers/firmware/smccc/rmm.c                  | 24 ++++++++
 drivers/firmware/smccc/rmm.h                  | 17 ++++++
 drivers/firmware/smccc/smccc.c                | 17 ++++++
 drivers/virt/coco/arm-cca-guest/Kconfig       |  1 +
 drivers/virt/coco/arm-cca-guest/Makefile      |  2 +
 .../{arm-cca-guest.c => arm-cca.c}            | 59 +++++++++----------
 10 files changed, 94 insertions(+), 32 deletions(-)
 create mode 100644 drivers/firmware/smccc/rmm.c
 create mode 100644 drivers/firmware/smccc/rmm.h
 rename drivers/virt/coco/arm-cca-guest/{arm-cca-guest.c => arm-cca.c} (84%)

diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
index 88b50d660e85..2d2d363aaaee 100644
--- a/arch/arm64/include/asm/rsi.h
+++ b/arch/arm64/include/asm/rsi.h
@@ -10,7 +10,7 @@
 #include <linux/jump_label.h>
 #include <asm/rsi_cmds.h>
 
-#define RSI_PDEV_NAME "arm-cca-dev"
+#define RSI_DEV_NAME "arm-rsi-dev"
 
 DECLARE_STATIC_KEY_FALSE(rsi_present);
 
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index 9e846ce4ef9c..8380e5ba88d2 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -161,7 +161,7 @@ void __init arm64_rsi_init(void)
 }
 
 static struct platform_device rsi_dev = {
-	.name = RSI_PDEV_NAME,
+	.name = "arm-cca-dev",
 	.id = PLATFORM_DEVID_NONE
 };
 
diff --git a/drivers/firmware/smccc/Kconfig b/drivers/firmware/smccc/Kconfig
index 15e7466179a6..2b6984757241 100644
--- a/drivers/firmware/smccc/Kconfig
+++ b/drivers/firmware/smccc/Kconfig
@@ -8,6 +8,7 @@ config HAVE_ARM_SMCCC
 config HAVE_ARM_SMCCC_DISCOVERY
 	bool
 	depends on ARM_PSCI_FW
+	select AUXILIARY_BUS
 	default y
 	help
 	 SMCCC v1.0 lacked discoverability and hence PSCI v1.0 was updated
diff --git a/drivers/firmware/smccc/Makefile b/drivers/firmware/smccc/Makefile
index 40d19144a860..146dc3c03c20 100644
--- a/drivers/firmware/smccc/Makefile
+++ b/drivers/firmware/smccc/Makefile
@@ -2,3 +2,4 @@
 #
 obj-$(CONFIG_HAVE_ARM_SMCCC_DISCOVERY)	+= smccc.o kvm_guest.o
 obj-$(CONFIG_ARM_SMCCC_SOC_ID)	+= soc_id.o
+obj-$(CONFIG_ARM64) += rmm.o
diff --git a/drivers/firmware/smccc/rmm.c b/drivers/firmware/smccc/rmm.c
new file mode 100644
index 000000000000..728338cb5a22
--- /dev/null
+++ b/drivers/firmware/smccc/rmm.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026 Arm Limited
+ */
+
+#include <linux/auxiliary_bus.h>
+
+#include "rmm.h"
+
+void __init register_rsi_device(struct platform_device *pdev)
+{
+	unsigned long ret;
+	unsigned long ver_lower, ver_higher;
+
+	if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC)
+		return;
+
+	ret = rsi_request_version(RSI_ABI_VERSION, &ver_lower, &ver_higher);
+	if (ret != RSI_SUCCESS)
+		return;
+
+	__devm_auxiliary_device_create(&pdev->dev,
+				       "arm_cca_guest", RSI_DEV_NAME, NULL, 0);
+}
diff --git a/drivers/firmware/smccc/rmm.h b/drivers/firmware/smccc/rmm.h
new file mode 100644
index 000000000000..a47a650d4f51
--- /dev/null
+++ b/drivers/firmware/smccc/rmm.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SMCCC_RMM_H
+#define _SMCCC_RMM_H
+
+#include <linux/platform_device.h>
+
+#ifdef CONFIG_ARM64
+#include <asm/rsi_cmds.h>
+void __init register_rsi_device(struct platform_device *pdev);
+#else
+
+static void __init register_rsi_device(struct platform_device *pdev)
+{
+
+}
+#endif
+#endif
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index bdee057db2fd..eb077b9aa6da 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -12,6 +12,8 @@
 #include <linux/platform_device.h>
 #include <asm/archrandom.h>
 
+#include "rmm.h"
+
 static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
 static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
 
@@ -85,6 +87,21 @@ static int __init smccc_devices_init(void)
 {
 	struct platform_device *pdev;
 
+	if (smccc_conduit == SMCCC_CONDUIT_NONE)
+		return 0;
+
+	pdev = platform_device_register_simple("arm-smccc",
+					PLATFORM_DEVID_NONE, NULL, 0);
+	if (IS_ERR(pdev)) {
+		pr_err("arm-smccc: could not register device: %ld\n", PTR_ERR(pdev));
+	} else {
+		/*
+		 * Register the RMI and RSI devices only when firmware exposes
+		 * the required SMCCC function IDs at a supported revision.
+		 */
+		register_rsi_device(pdev);
+	}
+
 	if (smccc_trng_available) {
 		pdev = platform_device_register_simple("smccc_trng", -1,
 						       NULL, 0);
diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig
index 3f0f013f03f1..a42359a90558 100644
--- a/drivers/virt/coco/arm-cca-guest/Kconfig
+++ b/drivers/virt/coco/arm-cca-guest/Kconfig
@@ -2,6 +2,7 @@ config ARM_CCA_GUEST
 	tristate "Arm CCA Guest driver"
 	depends on ARM64
 	select TSM_REPORTS
+	select AUXILIARY_BUS
 	help
 	  The driver provides userspace interface to request and
 	  attestation report from the Realm Management Monitor(RMM).
diff --git a/drivers/virt/coco/arm-cca-guest/Makefile b/drivers/virt/coco/arm-cca-guest/Makefile
index 69eeba08e98a..75a120e24fda 100644
--- a/drivers/virt/coco/arm-cca-guest/Makefile
+++ b/drivers/virt/coco/arm-cca-guest/Makefile
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest.o
+
+arm-cca-guest-y +=  arm-cca.o
diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca.c
similarity index 84%
rename from drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
rename to drivers/virt/coco/arm-cca-guest/arm-cca.c
index 0c9ea24a200c..7daada072cc0 100644
--- a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c
+++ b/drivers/virt/coco/arm-cca-guest/arm-cca.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2023 ARM Ltd.
  */
 
+#include <linux/auxiliary_bus.h>
 #include <linux/arm-smccc.h>
 #include <linux/cc_platform.h>
 #include <linux/kernel.h>
@@ -181,52 +182,50 @@ static int arm_cca_report_new(struct tsm_report *report, void *data)
 	return ret;
 }
 
-static const struct tsm_report_ops arm_cca_tsm_ops = {
+static const struct tsm_report_ops arm_cca_tsm_report_ops = {
 	.name = KBUILD_MODNAME,
 	.report_new = arm_cca_report_new,
 };
 
-/**
- * arm_cca_guest_init - Register with the Trusted Security Module (TSM)
- * interface.
- *
- * Return:
- * * %0        - Registered successfully with the TSM interface.
- * * %-ENODEV  - The execution context is not an Arm Realm.
- * * %-EBUSY   - Already registered.
- */
-static int __init arm_cca_guest_init(void)
+static void unregister_cca_tsm_report(void *data)
+{
+	tsm_report_unregister(&arm_cca_tsm_report_ops);
+}
+
+static int cca_devsec_tsm_probe(struct auxiliary_device *adev,
+		const struct auxiliary_device_id *id)
 {
 	int ret;
 
 	if (!is_realm_world())
 		return -ENODEV;
 
-	ret = tsm_report_register(&arm_cca_tsm_ops, NULL);
-	if (ret < 0)
-		pr_err("Error %d registering with TSM\n", ret);
+	ret = tsm_report_register(&arm_cca_tsm_report_ops, NULL);
+	if (ret < 0) {
+		dev_err_probe(&adev->dev, ret, "Error registering with TSM\n");
+		return ret;
+	}
 
-	return ret;
-}
-module_init(arm_cca_guest_init);
+	ret = devm_add_action_or_reset(&adev->dev, unregister_cca_tsm_report, NULL);
+	if (ret < 0) {
+		dev_err_probe(&adev->dev, ret, "Error registering devm action\n");
+		return ret;
+	}
 
-/**
- * arm_cca_guest_exit - unregister with the Trusted Security Module (TSM)
- * interface.
- */
-static void __exit arm_cca_guest_exit(void)
-{
-	tsm_report_unregister(&arm_cca_tsm_ops);
+	return 0;
 }
-module_exit(arm_cca_guest_exit);
 
-/* modalias, so userspace can autoload this module when RSI is available */
-static const struct platform_device_id arm_cca_match[] __maybe_unused = {
-	{ RSI_PDEV_NAME, 0},
-	{ }
+static const struct auxiliary_device_id cca_devsec_tsm_id_table[] = {
+	{ .name =  KBUILD_MODNAME "." RSI_DEV_NAME },
+	{}
 };
+MODULE_DEVICE_TABLE(auxiliary, cca_devsec_tsm_id_table);
 
-MODULE_DEVICE_TABLE(platform, arm_cca_match);
+static struct auxiliary_driver cca_devsec_tsm_driver = {
+	.probe = cca_devsec_tsm_probe,
+	.id_table = cca_devsec_tsm_id_table,
+};
+module_auxiliary_driver(cca_devsec_tsm_driver);
 MODULE_AUTHOR("Sami Mujawar <sami.mujawar@arm.com>");
 MODULE_DESCRIPTION("Arm CCA Guest TSM Driver");
 MODULE_LICENSE("GPL");
-- 
2.43.0


^ permalink raw reply related

* [PATCH v5 0/3] Switch Arm CCA to use an auxiliary device instead of a platform device
From: Aneesh Kumar K.V (Arm) @ 2026-05-14  9:40 UTC (permalink / raw)
  To: linux-coco, linux-arm-kernel, linux-kernel
  Cc: Aneesh Kumar K.V (Arm), Catalin Marinas, Greg KH, Jeremy Linton,
	Jonathan Cameron, Lorenzo Pieralisi, Mark Rutland, Sudeep Holla,
	Will Deacon, Steven Price, Suzuki K Poulose

As discussed here:
https://lore.kernel.org/all/20250728135216.48084-12-aneesh.kumar@kernel.org

The general feedback was that a platform device should not be used when
there is no underlying platform resource to represent. The existing CCA
support uses a platform device solely to anchor the TSM interface in the
device hierarchy, which is not an appropriate use of a platform device.
Use an auxiliary device instead to track CCA support.

The TSM framework uses the device abstraction to provide cross-architecture
TSM and TEE I/O functionality, including enumerating available platform TEE
I/O capabilities and provisioning connections between the platform TSM and
device DSMs.

For the CCA platform, the resulting device hierarchy appears as follows.
Note that the auxiliary device is still parented by the arm-smccc platform
device, so the sysfs path remains under /devices/platform/arm-smccc/:

$ cd /sys/class/tsm/
$ ls -al
total 0
drwxr-xr-x    2 root     root             0 Jan  1 00:02 .
drwxr-xr-x   23 root     root             0 Jan  1 00:00 ..
lrwxrwxrwx    1 root     root             0 Jan  1 00:03 tsm0 -> ../../devices/platform/arm-smccc/arm_cca_guest.arm-rsi-dev.0/tsm/tsm0
$

Changes from v4:
https://lore.kernel.org/all/20260427061615.905018-1-aneesh.kumar@kernel.org
* Add /sys/firmware/cca/realm_guest for detecting realm guest
* Convert smccc-trng to auxiliary device from platform device

Changes from v3:
https://lore.kernel.org/all/20260309100507.2303361-1-aneesh.kumar@kernel.org
* Rebased onto the latest kernel
* Drop pr_fmt() from drivers/firmware/smccc/rmm.c

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Lorenzo Pieralisi <lpieralisi@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>

Aneesh Kumar K.V (Arm) (3):
  firmware: smccc: coco: Manage arm-smccc platform device and CCA
    auxiliary drivers
  hwrng: arm_smccc_trng: Register as an auxiliary device
  coco: guest: arm64: Replace dummy CCA device with sysfs ABI

 Documentation/ABI/testing/sysfs-firmware-cca  | 10 ++++
 arch/arm64/include/asm/rsi.h                  |  2 +-
 arch/arm64/kernel/rsi.c                       | 39 ++++++++----
 drivers/char/hw_random/arm_smccc_trng.c       | 25 ++++----
 drivers/firmware/smccc/Kconfig                |  1 +
 drivers/firmware/smccc/Makefile               |  1 +
 drivers/firmware/smccc/rmm.c                  | 24 ++++++++
 drivers/firmware/smccc/rmm.h                  | 17 ++++++
 drivers/firmware/smccc/smccc.c                | 29 +++++++--
 drivers/virt/coco/arm-cca-guest/Kconfig       |  1 +
 drivers/virt/coco/arm-cca-guest/Makefile      |  2 +
 .../{arm-cca-guest.c => arm-cca.c}            | 59 +++++++++----------
 12 files changed, 153 insertions(+), 57 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-cca
 create mode 100644 drivers/firmware/smccc/rmm.c
 create mode 100644 drivers/firmware/smccc/rmm.h
 rename drivers/virt/coco/arm-cca-guest/{arm-cca-guest.c => arm-cca.c} (84%)

-- 
2.43.0


^ permalink raw reply

* Re: [PATCH v14 10/44] arm64: RMI: Add support for SRO
From: Steven Price @ 2026-05-14  9:33 UTC (permalink / raw)
  To: Aneesh Kumar K.V, kvm, kvmarm
  Cc: Catalin Marinas, Marc Zyngier, Will Deacon, James Morse,
	Oliver Upton, Suzuki K Poulose, Zenghui Yu, linux-arm-kernel,
	linux-kernel, Joey Gouly, Alexandru Elisei, Christoffer Dall,
	Fuad Tabba, linux-coco, Ganapatrao Kulkarni, Gavin Shan,
	Shanker Donthineni, Alper Gun, Emi Kisanuki, Vishal Annapurve,
	WeiLin.Chang, Lorenzo.Pieralisi2
In-Reply-To: <yq5a8q9ma08r.fsf@kernel.org>

On 14/05/2026 09:01, Aneesh Kumar K.V wrote:
> Steven Price <steven.price@arm.com> writes:
> 
>> +unsigned long rmi_sro_execute(struct rmi_sro_state *sro, gfp_t gfp)
>> +{
>> +	unsigned long sro_handle;
>> +	struct arm_smccc_1_2_regs regs;
>> +	struct arm_smccc_1_2_regs *regs_in = &sro->regs;
>> +
>> +	rmi_smccc_invoke(regs_in, &regs);
>> +
>> +	sro_handle = regs.a1;
>> +
>> +	while (RMI_RETURN_STATUS(regs.a0) == RMI_INCOMPLETE) {
>> +		bool can_cancel = RMI_RETURN_CAN_CANCEL(regs.a0);
>> +		int ret;
>> +
>> +		switch (RMI_RETURN_MEMREQ(regs.a0)) {
>> +		case RMI_OP_MEM_REQ_NONE:
>> +			regs = (struct arm_smccc_1_2_regs){
>> +				SMC_RMI_OP_CONTINUE, sro_handle, 0
>> +			};
>> +			rmi_smccc_invoke(&regs, &regs);
>> +			break;
>> +		case RMI_OP_MEM_REQ_DONATE:
>> +			ret = rmi_sro_donate(sro, sro_handle, regs.a2, &regs,
>> +					     gfp);
>> +			break;
>> +		case RMI_OP_MEM_REQ_RECLAIM:
>> +			ret = rmi_sro_reclaim(sro, sro_handle, &regs);
>> +			break;
>> +		default:
>> +			ret = WARN_ON(1);
>> +			break;
>> +		}
>> +
>> +		if (ret) {
>> +			if (can_cancel) {
>> +				/*
>> +				 * FIXME: Handle cancelling properly!
>> +				 *
>> +				 * If the operation has failed due to memory
>> +				 * allocation failure then the information on
>> +				 * the memory allocation should be saved, so
>> +				 * that the allocation can be repeated outside
>> +				 * of any context which prevented the
>> +				 * allocation.
>> +				 */
>> +			}
>> +			if (WARN_ON(ret))
>> +				return ret;
>> +		}
>> +	}
>> +
>> +	return regs.a0;
>> +}
> 
> Can you also add support to return x1,x2 etc

Indeed that's going to be needed. Looking at this function again I don't 
think we actually need the on-stack 'regs' any more. So the below (very 
lightly tested) diff would use the regs from sro which also means they 
will be there for the caller if it needs them.

Thanks,
Steve

---8<---
diff --git a/arch/arm64/kernel/rmi.c b/arch/arm64/kernel/rmi.c
index a8107ca9bb6d..58a0216be409 100644
--- a/arch/arm64/kernel/rmi.c
+++ b/arch/arm64/kernel/rmi.c
@@ -356,30 +356,29 @@ void rmi_sro_free(struct rmi_sro_state *sro)
 unsigned long rmi_sro_execute(struct rmi_sro_state *sro, gfp_t gfp)
 {
 	unsigned long sro_handle;
-	struct arm_smccc_1_2_regs regs;
-	struct arm_smccc_1_2_regs *regs_in = &sro->regs;
+	struct arm_smccc_1_2_regs *regs = &sro->regs;
 
-	rmi_smccc_invoke(regs_in, &regs);
+	rmi_smccc_invoke(regs, regs);
 
-	sro_handle = regs.a1;
+	sro_handle = regs->a1;
 
-	while (RMI_RETURN_STATUS(regs.a0) == RMI_INCOMPLETE) {
-		bool can_cancel = RMI_RETURN_CAN_CANCEL(regs.a0);
+	while (RMI_RETURN_STATUS(regs->a0) == RMI_INCOMPLETE) {
+		bool can_cancel = RMI_RETURN_CAN_CANCEL(regs->a0);
 		int ret;
 
-		switch (RMI_RETURN_MEMREQ(regs.a0)) {
+		switch (RMI_RETURN_MEMREQ(regs->a0)) {
 		case RMI_OP_MEM_REQ_NONE:
-			regs = (struct arm_smccc_1_2_regs){
+			*regs = (struct arm_smccc_1_2_regs){
 				SMC_RMI_OP_CONTINUE, sro_handle, 0
 			};
-			rmi_smccc_invoke(&regs, &regs);
+			rmi_smccc_invoke(regs, regs);
 			break;
 		case RMI_OP_MEM_REQ_DONATE:
-			ret = rmi_sro_donate(sro, sro_handle, regs.a2, &regs,
+			ret = rmi_sro_donate(sro, sro_handle, regs->a2, regs,
 					     gfp);
 			break;
 		case RMI_OP_MEM_REQ_RECLAIM:
-			ret = rmi_sro_reclaim(sro, sro_handle, &regs);
+			ret = rmi_sro_reclaim(sro, sro_handle, regs);
 			break;
 		default:
 			ret = WARN_ON(1);
@@ -404,7 +403,7 @@ unsigned long rmi_sro_execute(struct rmi_sro_state *sro, gfp_t gfp)
 		}
 	}
 
-	return regs.a0;
+	return regs->a0;
 }
 
 static int rmi_check_version(void)


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox