LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v5 16/20] iommu/dma: Check atomic pool allocation result directly
From: Aneesh Kumar K.V (Arm) @ 2026-05-22  4:28 UTC (permalink / raw)
  To: iommu, linux-arm-kernel, linux-kernel, linux-coco
  Cc: Aneesh Kumar K.V (Arm), Robin Murphy, Marek Szyprowski,
	Will Deacon, Marc Zyngier, Steven Price, Suzuki K Poulose,
	Catalin Marinas, Jiri Pirko, Jason Gunthorpe, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <20260522042815.370873-1-aneesh.kumar@kernel.org>

The non-blocking, non-coherent allocation path uses dma_alloc_from_pool(),
which returns the allocated page and fills cpu_addr only on success.

Do not rely on cpu_addr to detect allocation failure in this path. Check
the returned page directly before using it for the IOMMU mapping.

Fixes: 9420139f516d ("dma-pool: fix coherent pool allocations for IOMMU mappings")
Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 drivers/iommu/dma-iommu.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 725c7adb0a8d..52c599f4472c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1671,13 +1671,16 @@ void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 	}
 
 	if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !gfpflags_allow_blocking(gfp) && !coherent)
+	    !gfpflags_allow_blocking(gfp) && !coherent) {
 		page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
 					   gfp, attrs, NULL);
-	else
+		if (!page)
+			return NULL;
+	} else {
 		cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
-	if (!cpu_addr)
-		return NULL;
+		if (!cpu_addr)
+			return NULL;
+	}
 
 	*handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
 			dev->coherent_dma_mask);
-- 
2.43.0



^ permalink raw reply related

* [PATCH v5 17/20] dma: swiotlb: free dynamic pools from process context
From: Aneesh Kumar K.V (Arm) @ 2026-05-22  4:28 UTC (permalink / raw)
  To: iommu, linux-arm-kernel, linux-kernel, linux-coco
  Cc: Aneesh Kumar K.V (Arm), Robin Murphy, Marek Szyprowski,
	Will Deacon, Marc Zyngier, Steven Price, Suzuki K Poulose,
	Catalin Marinas, Jiri Pirko, Jason Gunthorpe, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <20260522042815.370873-1-aneesh.kumar@kernel.org>

swiotlb_dyn_free() is used after removing a dynamic swiotlb pool from
RCU-protected lists. It can call swiotlb_free_tlb(), which may need to
restore the encryption state of an unencrypted pool with
set_memory_encrypted() before freeing the pages.

RCU callbacks run in atomic context, but set_memory_encrypted() is not
guaranteed to be atomic-safe on all architectures. For example, page
attribute updates may allocate page tables or take sleeping locks.

Use queue_rcu_work() for dynamic pool freeing instead. This keeps the RCU
grace period before freeing a published pool, while running the actual pool
teardown from workqueue context. Use the same helper for the transient-pool
error path, since that path may also be reached from atomic DMA mapping
context.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 include/linux/swiotlb.h |  4 ++--
 kernel/dma/swiotlb.c    | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 4dcbf3931be1..526f82e9da45 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -64,7 +64,7 @@ extern void __init swiotlb_update_mem_attributes(void);
  * @areas:	Array of memory area descriptors.
  * @slots:	Array of slot descriptors.
  * @node:	Member of the IO TLB memory pool list.
- * @rcu:	RCU head for swiotlb_dyn_free().
+ * @dyn_free:	RCU work item used to free the pool from process context.
  * @transient:  %true if transient memory pool.
  */
 struct io_tlb_pool {
@@ -79,7 +79,7 @@ struct io_tlb_pool {
 	struct io_tlb_slot *slots;
 #ifdef CONFIG_SWIOTLB_DYNAMIC
 	struct list_head node;
-	struct rcu_head rcu;
+	struct rcu_work dyn_free;
 	bool transient;
 	bool unencrypted;
 #endif
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f4e8b241a1c4..4c56f64602ea 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -774,13 +774,10 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
 	add_mem_pool(mem, pool);
 }
 
-/**
- * swiotlb_dyn_free() - RCU callback to free a memory pool
- * @rcu:	RCU head in the corresponding struct io_tlb_pool.
- */
-static void swiotlb_dyn_free(struct rcu_head *rcu)
+static void swiotlb_dyn_free_work(struct work_struct *work)
 {
-	struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu);
+	struct io_tlb_pool *pool =
+		container_of(to_rcu_work(work), struct io_tlb_pool, dyn_free);
 	size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs);
 	size_t tlb_size = pool->end - pool->start;
 
@@ -789,6 +786,12 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
 	kfree(pool);
 }
 
+static void swiotlb_schedule_dyn_free(struct io_tlb_pool *pool)
+{
+	INIT_RCU_WORK(&pool->dyn_free, swiotlb_dyn_free_work);
+	queue_rcu_work(system_wq, &pool->dyn_free);
+}
+
 /**
  * __swiotlb_find_pool() - find the IO TLB pool for a physical address
  * @dev:        Device which has mapped the DMA buffer.
@@ -835,7 +838,7 @@ static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool)
 	list_del_rcu(&pool->node);
 	spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
 
-	call_rcu(&pool->rcu, swiotlb_dyn_free);
+	swiotlb_schedule_dyn_free(pool);
 }
 
 #endif	/* CONFIG_SWIOTLB_DYNAMIC */
@@ -1276,7 +1279,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	index = swiotlb_search_pool_area(dev, pool, 0, orig_addr, tbl_dma_addr,
 					 alloc_size, alloc_align_mask);
 	if (index < 0) {
-		swiotlb_dyn_free(&pool->rcu);
+		swiotlb_schedule_dyn_free(pool);
 		return -1;
 	}
 
-- 
2.43.0



^ permalink raw reply related

* [PATCH v5 18/20] dma: swiotlb: handle set_memory_decrypted() failures
From: Aneesh Kumar K.V (Arm) @ 2026-05-22  4:28 UTC (permalink / raw)
  To: iommu, linux-arm-kernel, linux-kernel, linux-coco
  Cc: Aneesh Kumar K.V (Arm), Robin Murphy, Marek Szyprowski,
	Will Deacon, Marc Zyngier, Steven Price, Suzuki K Poulose,
	Catalin Marinas, Jiri Pirko, Jason Gunthorpe, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <20260522042815.370873-1-aneesh.kumar@kernel.org>

Check the return value when converting swiotlb pools between encrypted and
decrypted mappings. If the default pool cannot be decrypted after early
initialization, mark the pool fully used so it cannot satisfy future bounce
allocations.

For late initialization, return the `set_memory_decrypted()` failure. For
restricted DMA pools, fail device initialization if the reserved pool
cannot be decrypted.

This prevents swiotlb from using pools whose encryption attributes do not
match their metadata, and avoids returning pages with uncertain encryption
state back to the allocator.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 kernel/dma/swiotlb.c | 80 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 65 insertions(+), 15 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 4c56f64602ea..14d834ca298b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -248,6 +248,23 @@ static inline unsigned long nr_slots(u64 val)
 	return DIV_ROUND_UP(val, IO_TLB_SIZE);
 }
 
+static void swiotlb_mark_pool_used(struct io_tlb_pool *pool)
+{
+	unsigned long i;
+
+	for (i = 0; i < pool->nareas; i++) {
+		pool->areas[i].index = 0;
+		pool->areas[i].used = pool->area_nslabs;
+	}
+
+	for (i = 0; i < pool->nslabs; i++) {
+		pool->slots[i].list = 0;
+		pool->slots[i].orig_addr = INVALID_PHYS_ADDR;
+		pool->slots[i].alloc_size = 0;
+		pool->slots[i].pad_slots = 0;
+	}
+}
+
 /*
  * Early SWIOTLB allocation may be too early to allow an architecture to
  * perform the desired operations.  This function allows the architecture to
@@ -272,8 +289,16 @@ void __init swiotlb_update_mem_attributes(void)
 		return;
 	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
 
-	if (io_tlb_default_mem.unencrypted)
-		set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
+	if (io_tlb_default_mem.unencrypted) {
+		int ret;
+
+		ret = set_memory_decrypted((unsigned long)mem->vaddr,
+					   bytes >> PAGE_SHIFT);
+		if (ret) {
+			pr_warn("Failed to decrypt default memory pool, disabling it\n");
+			swiotlb_mark_pool_used(mem);
+		}
+	}
 }
 
 static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
@@ -442,9 +467,10 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 {
 	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+	unsigned int order, area_order, slot_order;
+	bool leak_pages = false;
 	unsigned int nareas;
 	unsigned char *vstart = NULL;
-	unsigned int order, area_order;
 	bool retried = false;
 	int rc = 0;
 
@@ -504,6 +530,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 			(PAGE_SIZE << order) >> 20);
 	}
 
+	rc = -ENOMEM;
 	nareas = limit_nareas(default_nareas, nslabs);
 	area_order = get_order(array_size(sizeof(*mem->areas), nareas));
 	mem->areas = (struct io_tlb_area *)
@@ -511,14 +538,20 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 	if (!mem->areas)
 		goto error_area;
 
+	slot_order = get_order(array_size(sizeof(*mem->slots), nslabs));
 	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-		get_order(array_size(sizeof(*mem->slots), nslabs)));
+					      slot_order);
 	if (!mem->slots)
 		goto error_slots;
 
-	if (io_tlb_default_mem.unencrypted)
-		set_memory_decrypted((unsigned long)vstart,
-				     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+	if (io_tlb_default_mem.unencrypted) {
+		rc = set_memory_decrypted((unsigned long)vstart,
+					  (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+		if (rc) {
+			leak_pages = true;
+			goto error_decrypt;
+		}
+	}
 
 	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
 				 nareas);
@@ -527,16 +560,20 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 	swiotlb_print_info();
 	return 0;
 
+error_decrypt:
+	free_pages((unsigned long)mem->slots, slot_order);
 error_slots:
 	free_pages((unsigned long)mem->areas, area_order);
 error_area:
-	free_pages((unsigned long)vstart, order);
-	return -ENOMEM;
+	if (!leak_pages)
+		free_pages((unsigned long)vstart, order);
+	return rc;
 }
 
 void __init swiotlb_exit(void)
 {
 	struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
+	bool leak_pages = false;
 	unsigned long tbl_vaddr;
 	size_t tbl_size, slots_size;
 	unsigned int area_order;
@@ -552,19 +589,23 @@ void __init swiotlb_exit(void)
 	tbl_size = PAGE_ALIGN(mem->end - mem->start);
 	slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
 
-	if (io_tlb_default_mem.unencrypted)
-		set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
+	if (io_tlb_default_mem.unencrypted) {
+		if (set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT))
+			leak_pages = true;
+	}
 
 	if (mem->late_alloc) {
 		area_order = get_order(array_size(sizeof(*mem->areas),
 			mem->nareas));
 		free_pages((unsigned long)mem->areas, area_order);
-		free_pages(tbl_vaddr, get_order(tbl_size));
+		if (!leak_pages)
+			free_pages(tbl_vaddr, get_order(tbl_size));
 		free_pages((unsigned long)mem->slots, get_order(slots_size));
 	} else {
 		memblock_free(mem->areas,
 			array_size(sizeof(*mem->areas), mem->nareas));
-		memblock_phys_free(mem->start, tbl_size);
+		if (!leak_pages)
+			memblock_phys_free(mem->start, tbl_size);
 		memblock_free(mem->slots, slots_size);
 	}
 
@@ -1938,9 +1979,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 		 * restricted mem pool is decrypted by default
 		 */
 		if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
+			int ret;
+
 			mem->unencrypted = true;
-			set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
-					     rmem->size >> PAGE_SHIFT);
+			ret = set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
+						   rmem->size >> PAGE_SHIFT);
+			if (ret) {
+				dev_err(dev, "Failed to decrypt restricted DMA pool\n");
+				kfree(pool->areas);
+				kfree(pool->slots);
+				kfree(mem);
+				return ret;
+			}
 		} else {
 			mem->unencrypted = false;
 		}
-- 
2.43.0



^ permalink raw reply related

* [PATCH v5 19/20] dma: free atomic pool pages by physical address
From: Aneesh Kumar K.V (Arm) @ 2026-05-22  4:28 UTC (permalink / raw)
  To: iommu, linux-arm-kernel, linux-kernel, linux-coco
  Cc: Aneesh Kumar K.V (Arm), Robin Murphy, Marek Szyprowski,
	Will Deacon, Marc Zyngier, Steven Price, Suzuki K Poulose,
	Catalin Marinas, Jiri Pirko, Jason Gunthorpe, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <20260522042815.370873-1-aneesh.kumar@kernel.org>

dma_direct_alloc_pages() may satisfy atomic allocations from the coherent
atomic pools. The pool allocation is keyed by the virtual address stored in
the gen_pool, but the pages API returns only the backing struct page.

On architectures with CONFIG_DMA_DIRECT_REMAP, atomic pool chunks are added
to the gen_pool using their remapped virtual address.
dma_direct_free_pages() reconstructs a linear-map address with
page_address(page) and passes that to dma_free_from_pool(). That address
does not match the gen_pool virtual range, so the pool lookup can fail and
the code can fall through to freeing a pool-owned page through the normal
page allocator path.

Add a page-based pool free helper that looks up the owning pool chunk by
physical address, translates it back to the gen_pool virtual address, and
frees that address to the pool. Use it from dma_direct_free_pages() while
keeping the existing virtual-address helper for coherent allocation frees.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 include/linux/dma-map-ops.h |  1 +
 kernel/dma/direct.c         |  4 +--
 kernel/dma/pool.c           | 54 +++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 696b2c3a2305..8be059e69935 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -215,6 +215,7 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size,
 		void **cpu_addr, gfp_t flags, unsigned long attrs,
 		bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t));
 bool dma_free_from_pool(struct device *dev, void *start, size_t size);
+bool dma_free_from_pool_page(struct device *dev, struct page *page, size_t size);
 
 int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
 		dma_addr_t dma_start, u64 size);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 907c6084c616..488d53ed21f3 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -488,9 +488,9 @@ void dma_direct_free_pages(struct device *dev, size_t size,
 	 */
 	bool mark_mem_encrypted = force_dma_unencrypted(dev);
 
-	/* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+	/* If page is not from an atomic pool, dma_free_from_pool_page() fails */
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
-	    dma_free_from_pool(dev, vaddr, size))
+	    dma_free_from_pool_page(dev, page, size))
 		return;
 
 	phys = page_to_phys(page);
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index e7df8d279e75..43b8101d860f 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -356,3 +356,57 @@ bool dma_free_from_pool(struct device *dev, void *start, size_t size)
 
 	return false;
 }
+
+struct dma_pool_phys_match {
+	phys_addr_t phys;
+	size_t size;
+	unsigned long addr;
+	bool found;
+};
+
+static void dma_pool_find_phys(struct gen_pool *pool, struct gen_pool_chunk *chunk,
+			       void *data)
+{
+	struct dma_pool_phys_match *match = data;
+	phys_addr_t end = match->phys + match->size - 1;
+	phys_addr_t chunk_end;
+
+	if (match->found)
+		return;
+
+	chunk_end = chunk->phys_addr + (chunk->end_addr - chunk->start_addr);
+	if (match->phys < chunk->phys_addr || end > chunk_end)
+		return;
+
+	match->addr = chunk->start_addr + (match->phys - chunk->phys_addr);
+	match->found = true;
+}
+
+static bool dma_free_from_pool_phys(struct dma_gen_pool *dma_pool, phys_addr_t phys,
+				    size_t size)
+{
+	struct dma_pool_phys_match match = {
+		.phys = phys,
+		.size = size,
+	};
+
+	gen_pool_for_each_chunk(dma_pool->pool, dma_pool_find_phys, &match);
+	if (!match.found)
+		return false;
+
+	gen_pool_free(dma_pool->pool, match.addr, size);
+	return true;
+}
+
+bool dma_free_from_pool_page(struct device *dev, struct page *page, size_t size)
+{
+	struct dma_gen_pool *dma_pool = NULL;
+	phys_addr_t phys = page_to_phys(page);
+
+	while ((dma_pool = dma_guess_pool(dma_pool, 0))) {
+		if (dma_free_from_pool_phys(dma_pool, phys, size))
+			return true;
+	}
+
+	return false;
+}
-- 
2.43.0



^ permalink raw reply related

* [PATCH v5 20/20] swiotlb: Preserve allocation virtual address for dynamic pools
From: Aneesh Kumar K.V (Arm) @ 2026-05-22  4:28 UTC (permalink / raw)
  To: iommu, linux-arm-kernel, linux-kernel, linux-coco
  Cc: Aneesh Kumar K.V (Arm), Robin Murphy, Marek Szyprowski,
	Will Deacon, Marc Zyngier, Steven Price, Suzuki K Poulose,
	Catalin Marinas, Jiri Pirko, Jason Gunthorpe, Mostafa Saleh,
	Petr Tesarik, Alexey Kardashevskiy, Dan Williams, Xu Yilun,
	linuxppc-dev, linux-s390, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), Alexander Gordeev,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik,
	Christian Borntraeger, Sven Schnelle, x86
In-Reply-To: <20260522042815.370873-1-aneesh.kumar@kernel.org>

swiotlb_alloc_tlb() can allocate from the DMA atomic pool when a decrypted
pool is needed from atomic context. With CONFIG_DMA_DIRECT_REMAP, the
atomic pool is backed by remapped virtual addresses, which are not the same
as the direct-map addresses returned by phys_to_virt().

swiotlb_init_io_tlb_pool() currently reconstructs the pool virtual address
from the physical start address. For atomic-pool backed allocations this
stores the wrong address in pool->vaddr. Later, swiotlb_free_tlb() passes
that address to dma_free_from_pool(), which will fail to recognize the
chunk

Pass the virtual address returned by the allocation path into
swiotlb_init_io_tlb_pool(), and store that address in pool->vaddr. This
keeps the pool free path using the same virtual address as the allocator.

Signed-off-by: Aneesh Kumar K.V (Arm) <aneesh.kumar@kernel.org>
---
 kernel/dma/swiotlb.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 14d834ca298b..e4bd8c9eaeda 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -302,9 +302,9 @@ void __init swiotlb_update_mem_attributes(void)
 }
 
 static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
-		unsigned long nslabs, bool late_alloc, unsigned int nareas)
+		void *vaddr, unsigned long nslabs, bool late_alloc,
+		unsigned int nareas)
 {
-	void *vaddr = phys_to_virt(start);
 	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
 
 	mem->nslabs = nslabs;
@@ -445,7 +445,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
 		return;
 	}
 
-	swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas);
+	swiotlb_init_io_tlb_pool(mem, __pa(tlb), tlb, nslabs, false, nareas);
 	add_mem_pool(&io_tlb_default_mem, mem);
 
 	if (flags & SWIOTLB_VERBOSE)
@@ -553,7 +553,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 		}
 	}
 
-	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
+	swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), vstart, nslabs, true,
 				 nareas);
 	add_mem_pool(&io_tlb_default_mem, mem);
 
@@ -664,25 +664,26 @@ static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes,
  * @phys_limit:	Maximum allowed physical address of the buffer.
  * @attrs:	DMA attributes for the allocation.
  * @gfp:	GFP flags for the allocation.
+ * @vaddr:	Receives the virtual address for the allocated buffer.
  *
  * Return: Allocated pages, or %NULL on allocation failure.
  */
 static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
-		u64 phys_limit, unsigned long attrs, gfp_t gfp)
+		u64 phys_limit, unsigned long attrs, gfp_t gfp, void **vaddr)
 {
 	struct page *page;
 
+	*vaddr = NULL;
+
 	/*
 	 * Allocate from the atomic pools if memory is encrypted and
 	 * the allocation is atomic, because decrypting may block.
 	 */
 	if (!gfpflags_allow_blocking(gfp) && (attrs & DMA_ATTR_CC_SHARED)) {
-		void *vaddr;
-
 		if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
 			return NULL;
 
-		return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
+		return dma_alloc_from_pool(dev, bytes, vaddr, gfp,
 					   attrs, dma_coherent_ok);
 	}
 
@@ -705,6 +706,8 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
 			return NULL;
 	}
 
+	if (page)
+		*vaddr = phys_to_virt(page_to_phys(page));
 	return page;
 }
 
@@ -750,6 +753,7 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
 {
 	struct io_tlb_pool *pool;
 	unsigned int slot_order;
+	void *tlb_vaddr;
 	struct page *tlb;
 	size_t pool_size;
 	size_t tlb_size;
@@ -767,7 +771,8 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
 	pool->unencrypted = !!(attrs & DMA_ATTR_CC_SHARED);
 
 	tlb_size = nslabs << IO_TLB_SHIFT;
-	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, attrs, gfp))) {
+	while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, attrs, gfp,
+					 &tlb_vaddr))) {
 		if (nslabs <= minslabs)
 			goto error_tlb;
 		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
@@ -781,12 +786,12 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
 	if (!pool->slots)
 		goto error_slots;
 
-	swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas);
+	swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), tlb_vaddr, nslabs,
+				 true, nareas);
 	return pool;
 
 error_slots:
-	swiotlb_free_tlb(page_address(tlb), tlb_size,
-			 !!(attrs & DMA_ATTR_CC_SHARED));
+	swiotlb_free_tlb(tlb_vaddr, tlb_size, !!(attrs & DMA_ATTR_CC_SHARED));
 error_tlb:
 	kfree(pool);
 error:
@@ -1995,7 +2000,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 			mem->unencrypted = false;
 		}
 
-		swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs,
+		swiotlb_init_io_tlb_pool(pool, rmem->base, phys_to_virt(rmem->base),
+					 nslabs,
 					 false, nareas);
 		mem->force_bounce = true;
 		mem->for_alloc = true;
-- 
2.43.0



^ permalink raw reply related

* Re: [PATCH net-next] net: ucc_geth: Batch RX packets before stack handoff
From: Eric Dumazet @ 2026-05-22  4:44 UTC (permalink / raw)
  To: Rosen Penev
  Cc: Jakub Kicinski, netdev, Andrew Lunn, David S. Miller, Paolo Abeni,
	open list:FREESCALE QUICC ENGINE UCC ETHERNET DRIVER, open list
In-Reply-To: <CAKxU2N_63D7SFPkr9H9UYWif2DRW_qnV0SzZoRHWcRZtC_HTWg@mail.gmail.com>

On Thu, May 21, 2026 at 4:29 PM Rosen Penev <rosenp@gmail.com> wrote:
>
> On Thu, May 21, 2026 at 6:41 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Wed, May 20, 2026 at 5:39 PM Rosen Penev <rosenp@gmail.com> wrote:
> > >
> > > On Wed, May 20, 2026 at 4:57 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > > >
> > > > On Sun, 17 May 2026 12:28:56 -0700 Rosen Penev wrote:
> > > > > Collect received skbs on a local list during RX polling and pass the
> > > > > completed batch to netif_receive_skb_list(). This lets the networking
> > > > > stack process packets from a poll cycle in bulk instead of handing each
> > > > > skb up individually.
> > > >
> > > > GRO should be even better.
> > > GRO will result in slower routing performance because there is no
> > > hardware checksum.
> >
> > Then provide a knob or something, instead of trying to avoid GRO.
> >
> > For end hosts (forwarding not enabled), checksum will need to be
> > computed anyway.
> > GRO should be faster for them.
> >
> > Note that GRO also uses netif_receive_skb_list_internal()
> so you recommend switching to napi_gro_receive even though there's no
> RX hardware checksum?

Certainly.

There is a reason we added support for sw checksum in GRO years ago.

Most linux hosts on this planet do not forward packets.
And if they do, there is a big chance the egress device supports TSO
or tx checksum offload.


^ permalink raw reply

* Re: [PATCH resend] powerpc/code-patching: Avoid r/w mapping of the zero page
From: Michael Ellerman @ 2026-05-22  6:51 UTC (permalink / raw)
  To: Ard Biesheuvel, Christophe Leroy (CS GROUP), linux-kernel
  Cc: linuxppc-dev, Madhavan Srinivasan, Nicholas Piggin
In-Reply-To: <bc13fc3e-96eb-4aa9-a8d8-f7cf0c4cd505@app.fastmail.com>

On 20/5/2026 20:16, Ard Biesheuvel wrote:
> 
> On Wed, 20 May 2026, at 11:59, Christophe Leroy (CS GROUP) wrote:
>> Le 20/05/2026 à 11:40, Ard Biesheuvel a écrit :
>>>
>>> On Wed, 20 May 2026, at 11:36, Christophe Leroy (CS GROUP) wrote:
>>>> Le 20/05/2026 à 10:54, Ard Biesheuvel a écrit :
...
>>>>> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
>>>>> index f84e0337cc02..13a8acf851f1 100644
>>>>> --- a/arch/powerpc/lib/code-patching.c
>>>>> +++ b/arch/powerpc/lib/code-patching.c
>>>>> @@ -60,7 +60,7 @@ struct patch_context {
>>>>>     
>>>>>     static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
>>>>>     
>>>>> -static int map_patch_area(void *addr, unsigned long text_poke_addr);
>>>>> +static int map_patch_area(unsigned long text_poke_addr);
>>>>>     static void unmap_patch_area(unsigned long addr);
>>>>>     
>>>>>     static bool mm_patch_enabled(void)
>>>>> @@ -117,7 +117,7 @@ static int text_area_cpu_up(unsigned int cpu)
>>>>>     
>>>>>     	// Map/unmap the area to ensure all page tables are pre-allocated
>>>>>     	addr = (unsigned long)area->addr;
>>>>> -	err = map_patch_area(empty_zero_page, addr);
>>>>> +	err = map_patch_area(addr);
>>>>
>>>> I would get rid of map_patch_area() completely and just do:
>>>>
>>>> 	err = map_kernel_page(addr, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
>>>>
>>>
>>> I think retaining the symmetry of map_patch_area() and unmap_patch_area()
>>> makes sense too.
>>
>> Could also drop unmap_patch_area() and use unmap_kernel_page() instead.
>>
> 
> Good point. That way, we'll end up with
> 
>   arch/powerpc/lib/code-patching.c | 52 ++--------------------------------------
>   1 file changed, 2 insertions(+), 50 deletions(-
> 
> I'll spin a v2 with those changes once everyone on cc has had the opportunity
> to chime in.

That diffstat is definitely attractive.

I do like that unmap_patch_area() is more defensive with the page table 
walk, but it's probably overly paranoid. If page table levels have 
vanished since we just mapped them then the system is probably toast anyway.

So OK by me.

cheers


^ permalink raw reply

* Re: [PATCH v2 04/69] mm/hugetlb: Initialize gigantic bootmem hugepage struct pages earlier
From: Mike Rapoport @ 2026-05-22  8:13 UTC (permalink / raw)
  To: Muchun Song
  Cc: Andrew Morton, David Hildenbrand, Muchun Song, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Nicholas Piggin,
	Christophe Leroy, Ackerley Tng, Frank van der Linden,
	aneesh.kumar, joao.m.martins, linux-mm, linuxppc-dev,
	linux-kernel
In-Reply-To: <20260513130542.35604-5-songmuchun@bytedance.com>

On Wed, 13 May 2026 21:04:32 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> Gigantic bootmem HugeTLB pages are currently initialized from hugetlb_init(),
> but page_alloc_init_late() runs earlier and walks pageblocks to determine
> zone contiguity.
> 
> If a bootmem HugeTLB region is marked noinit, set_zone_contiguous() can
> observe still-uninitialized struct pages through __pageblock_pfn_to_page().
> This may not trigger an immediate failure, but it can make
> set_zone_contiguous() compute the wrong zone contiguity state. If extra
> poisoned-page checks are added in this path, such as PF_POISONED_CHECK()
> in page_zone_id(), it can also trigger an early boot panic.
> 
> [...]

Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

-- 
Sincerely yours,
Mike.



^ permalink raw reply

* Re: [PATCH v2 09/69] mm/mm_init: Defer hugetlb reservation until after zone initialization
From: Mike Rapoport @ 2026-05-22  8:13 UTC (permalink / raw)
  To: Muchun Song
  Cc: Andrew Morton, David Hildenbrand, Muchun Song, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Nicholas Piggin,
	Christophe Leroy, Ackerley Tng, Frank van der Linden,
	aneesh.kumar, joao.m.martins, linux-mm, linuxppc-dev,
	linux-kernel
In-Reply-To: <20260513130542.35604-10-songmuchun@bytedance.com>

On Wed, 13 May 2026 21:04:37 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> hugetlb_cma_reserve() and hugetlb_bootmem_alloc() currently run before
> free_area_init(), so HugeTLB reservation happens before zone state is
> initialized.
> 
> Move the reservation step after free_area_init() so the relevant zone
> information is available before HugeTLB reserves memory. This is needed
> for later hugetlb changes that validate boot-time HugeTLB reservations
> against zone boundaries.
> 
> [...]

Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

-- 
Sincerely yours,
Mike.



^ permalink raw reply

* Re: [PATCH v2 13/69] mm/hugetlb: Refactor early boot gigantic hugepage allocation
From: Mike Rapoport @ 2026-05-22  8:56 UTC (permalink / raw)
  To: Muchun Song
  Cc: Andrew Morton, David Hildenbrand, Muchun Song, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Nicholas Piggin,
	Christophe Leroy, Ackerley Tng, Frank van der Linden,
	aneesh.kumar, joao.m.martins, linux-mm, linuxppc-dev,
	linux-kernel
In-Reply-To: <20260513130542.35604-14-songmuchun@bytedance.com>

On Wed, 13 May 2026 21:04:41 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 558fafb82b8a..ff8c5ec831bb 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -104,17 +104,14 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p
>  	}
>  }
>  
> -static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
> +static __init void *pseries_alloc_bootmem_huge_page(struct hstate *hstate)
>  {
>  	struct huge_bootmem_page *m;

This can be void *m and powerpc code does not need to know about struct
huge_bootmem_page at all and its declaration can be moved out of
include/linux

Other than that

Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

-- 
Sincerely yours,
Mike.



^ permalink raw reply

* Re: [PATCH v2 19/69] mm/mm_init: Make __init_page_from_nid() static
From: Mike Rapoport @ 2026-05-22  8:56 UTC (permalink / raw)
  To: Muchun Song
  Cc: Andrew Morton, David Hildenbrand, Muchun Song, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Nicholas Piggin,
	Christophe Leroy, Ackerley Tng, Frank van der Linden,
	aneesh.kumar, joao.m.martins, linux-mm, linuxppc-dev,
	linux-kernel
In-Reply-To: <20260513130542.35604-20-songmuchun@bytedance.com>

On Wed, 13 May 2026 21:04:47 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
> __init_page_from_nid() no longer has external users and is only used
> locally in mm/mm_init.c under CONFIG_DEFERRED_STRUCT_PAGE_INIT.
> 
> Make it static and keep it inside that block.

Can we just fold it into its sole caller init_deferred_page() please?

-- 
Sincerely yours,
Mike.



^ permalink raw reply

* Re: [PATCH v2 13/69] mm/hugetlb: Refactor early boot gigantic hugepage allocation
From: Muchun Song @ 2026-05-22  9:05 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Muchun Song, Andrew Morton, David Hildenbrand, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Suren Baghdasaryan,
	Michal Hocko, Nicholas Piggin, Christophe Leroy, Ackerley Tng,
	Frank van der Linden, aneesh.kumar, joao.m.martins, linux-mm,
	linuxppc-dev, linux-kernel
In-Reply-To: <177944019668.3663073.6529634450029926963.b4-review@b4>



> On May 22, 2026, at 16:56, Mike Rapoport <rppt@kernel.org> wrote:
> 
> On Wed, 13 May 2026 21:04:41 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
>> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
>> index 558fafb82b8a..ff8c5ec831bb 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -104,17 +104,14 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p
>> }
>> }
>> 
>> -static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
>> +static __init void *pseries_alloc_bootmem_huge_page(struct hstate *hstate)
>> {
>> struct huge_bootmem_page *m;
> 
> This can be void *m and powerpc code does not need to know about struct
> huge_bootmem_page at all and its declaration can be moved out of
> include/linux

Make sense.

> 
> Other than that
> 
> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

Thanks.

Muchun.

> 
> -- 
> Sincerely yours,
> Mike.
> 



^ permalink raw reply

* Re: [PATCH v2 19/69] mm/mm_init: Make __init_page_from_nid() static
From: Muchun Song @ 2026-05-22  9:06 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Muchun Song, Andrew Morton, David Hildenbrand, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Suren Baghdasaryan,
	Michal Hocko, Nicholas Piggin, Christophe Leroy, Ackerley Tng,
	Frank van der Linden, aneesh.kumar, joao.m.martins, linux-mm,
	linuxppc-dev, linux-kernel
In-Reply-To: <177944019670.3663073.4082076266566320906.b4-review@b4>



> On May 22, 2026, at 16:56, Mike Rapoport <rppt@kernel.org> wrote:
> 
> On Wed, 13 May 2026 21:04:47 +0800, Muchun Song <songmuchun@bytedance.com> wrote:
>> __init_page_from_nid() no longer has external users and is only used
>> locally in mm/mm_init.c under CONFIG_DEFERRED_STRUCT_PAGE_INIT.
>> 
>> Make it static and keep it inside that block.
> 
> Can we just fold it into its sole caller init_deferred_page() please?

Yes. I can do that next version.

Thanks
Muchun

> 
> -- 
> Sincerely yours,
> Mike.
> 



^ permalink raw reply

* [PATCH v1 0/8] powerpc/signal: Convert to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev

This series converts powerpc architecture signal handling to scoped
user access and enlarges some of the block accesses to minimise the
number of times user access has to be opened and closed.

As mentioned in individual patches, some bring real performance
improvement.

This series is built from previous series [1] which predates
implementation of scoped user access.

[1] https://lore.kernel.org/all/1718f38859d5366f82d5bef531f255cedf537b5d.1631861883.git.christophe.leroy@csgroup.eu/T/#t

Christophe Leroy (CS GROUP) (8):
  powerpc/signal32: Convert to scoped user access
  powerpc/signal64: Untangle setup_tm_sigcontexts() and
    user_access_begin()
  powerpc/signal64: Convert to scoped user access
  powerpc/signal64: Access function descriptor with scoped user access
  powerpc/signal: Include the new stack frame inside the user access
    block
  signal: Add unsafe_copy_siginfo_to_user()
  powerpc/uaccess: Add unsafe_clear_user()
  powerpc/signal: Use unsafe_copy_siginfo_to_user()

 arch/powerpc/include/asm/uaccess.h |  20 ++
 arch/powerpc/kernel/signal_32.c    | 498 ++++++++++++++---------------
 arch/powerpc/kernel/signal_64.c    | 120 +++----
 include/linux/signal.h             |  15 +
 include/linux/uaccess.h            |   1 +
 kernel/signal.c                    |   5 -
 6 files changed, 323 insertions(+), 336 deletions(-)

-- 
2.54.0



^ permalink raw reply

* [PATCH v1 2/8] powerpc/signal64: Untangle setup_tm_sigcontexts() and user_access_begin()
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Call setup_tm_sigcontexts() before opening user access to avoid
having to close and open again.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_64.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 86bb5bb4c143..3849af21e1d8 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -873,6 +873,15 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	if (!MSR_TM_ACTIVE(msr))
 		prepare_setup_sigcontext(tsk);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(msr))
+		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
+					    &frame->uc_transact.uc_mcontext,
+					    tsk, ksig->sig, NULL,
+					    (unsigned long)ksig->ka.sa.sa_handler,
+					    msr);
+
+#endif
 	if (!user_write_access_begin(frame, sizeof(*frame)))
 		goto badframe;
 
@@ -889,19 +898,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
 		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
-
-		user_write_access_end();
-
-		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
-					    &frame->uc_transact.uc_mcontext,
-					    tsk, ksig->sig, NULL,
-					    (unsigned long)ksig->ka.sa.sa_handler,
-					    msr);
-
-		if (!user_write_access_begin(&frame->uc.uc_sigmask,
-					     sizeof(frame->uc.uc_sigmask)))
-			goto badframe;
-
 #endif
 	} else {
 		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 1/8] powerpc/signal32: Convert to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Commit 861574d51bbd ("powerpc/uaccess: Implement masked user access")
provides optimised user access by avoiding the cost of access_ok().

Convert signal32 functions to scoped user access.

Scoped user access also make the code simpler.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_32.c | 456 +++++++++++++++-----------------
 1 file changed, 217 insertions(+), 239 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 7a718ed32b27..f5d5139a1426 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -468,98 +468,98 @@ static long restore_user_regs(struct pt_regs *regs,
 {
 	unsigned int save_r2 = 0;
 	unsigned long msr;
-#ifdef CONFIG_VSX
-	int i;
-#endif
 
-	if (!user_read_access_begin(sr, sizeof(*sr)))
-		return 1;
-	/*
-	 * restore general registers but not including MSR or SOFTE. Also
-	 * take care of keeping r2 (TLS) intact if not a signal
-	 */
-	if (!sig)
-		save_r2 = (unsigned int)regs->gpr[2];
-	unsafe_restore_general_regs(regs, sr, failed);
-	set_trap_norestart(regs);
-	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
-	if (!sig)
-		regs->gpr[2] = (unsigned long) save_r2;
-
-	/* if doing signal return, restore the previous little-endian mode */
-	if (sig)
-		regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+	scoped_user_read_access(sr, failed) {
+		/*
+		 * restore general registers but not including MSR or SOFTE. Also
+		 * take care of keeping r2 (TLS) intact if not a signal
+		 */
+		if (!sig)
+			save_r2 = (unsigned int)regs->gpr[2];
+		unsafe_restore_general_regs(regs, sr, failed);
+		set_trap_norestart(regs);
+		unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+		if (!sig)
+			regs->gpr[2] = (unsigned long)save_r2;
+
+		/* if doing signal return, restore the previous little-endian mode */
+		if (sig)
+			regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
 #ifdef CONFIG_ALTIVEC
-	/*
-	 * Force the process to reload the altivec registers from
-	 * current->thread when it next does altivec instructions
-	 */
-	regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
-	if (msr & MSR_VEC) {
-		/* restore altivec registers from the stack */
-		unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
-				      sizeof(sr->mc_vregs), failed);
-		current->thread.used_vr = true;
-	} else if (current->thread.used_vr)
-		memset(&current->thread.vr_state, 0,
-		       ELF_NVRREG * sizeof(vector128));
-
-	/* Always get VRSAVE back */
-	unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		mtspr(SPRN_VRSAVE, current->thread.vrsave);
+		/*
+		 * Force the process to reload the altivec registers from
+		 * current->thread when it next does altivec instructions
+		 */
+		regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
+		if (msr & MSR_VEC) {
+			/* restore altivec registers from the stack */
+			unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+					      sizeof(sr->mc_vregs), failed);
+			current->thread.used_vr = true;
+		} else if (current->thread.used_vr) {
+			memset(&current->thread.vr_state, 0,
+			       ELF_NVRREG * sizeof(vector128));
+		}
+
+		/* Always get VRSAVE back */
+		unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			mtspr(SPRN_VRSAVE, current->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
-	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
+		unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
 #ifdef CONFIG_VSX
-	/*
-	 * Force the process to reload the VSX registers from
-	 * current->thread when it next does VSX instruction.
-	 */
-	regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
-	if (msr & MSR_VSX) {
 		/*
-		 * Restore altivec registers from the stack to a local
-		 * buffer, then write this out to the thread_struct
+		 * Force the process to reload the VSX registers from
+		 * current->thread when it next does VSX instruction.
 		 */
-		unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
-		current->thread.used_vsr = true;
-	} else if (current->thread.used_vsr)
-		for (i = 0; i < 32 ; i++)
-			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
+		if (msr & MSR_VSX) {
+			/*
+			 * Restore altivec registers from the stack to a local
+			 * buffer, then write this out to the thread_struct
+			 */
+			unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
+			current->thread.used_vsr = true;
+		} else if (current->thread.used_vsr) {
+			int i;
+
+			for (i = 0; i < 32 ; i++)
+				current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		}
 #endif /* CONFIG_VSX */
-	/*
-	 * force the process to reload the FP registers from
-	 * current->thread when it next does FP instructions
-	 */
-	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
+		/*
+		 * force the process to reload the FP registers from
+		 * current->thread when it next does FP instructions
+		 */
+		regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
 
 #ifdef CONFIG_SPE
-	/*
-	 * Force the process to reload the spe registers from
-	 * current->thread when it next does spe instructions.
-	 * Since this is user ABI, we must enforce the sizing.
-	 */
-	BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
-	regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
-	if (msr & MSR_SPE) {
-		/* restore spe registers from the stack */
-		unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
-				      sizeof(current->thread.spe), failed);
-		current->thread.used_spe = true;
-	} else if (current->thread.used_spe)
-		memset(&current->thread.spe, 0, sizeof(current->thread.spe));
-
-	/* Always get SPEFSCR back */
-	unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
-#endif /* CONFIG_SPE */
+		/*
+		 * Force the process to reload the spe registers from
+		 * current->thread when it next does spe instructions.
+		 * Since this is user ABI, we must enforce the sizing.
+		 */
+		BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
+		regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
+		if (msr & MSR_SPE) {
+			/* restore spe registers from the stack */
+			unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+					      sizeof(current->thread.spe), failed);
+			current->thread.used_spe = true;
+		} else if (current->thread.used_spe) {
+			memset(&current->thread.spe, 0, sizeof(current->thread.spe));
+		}
 
-	user_read_access_end();
+		/* Always get SPEFSCR back */
+		unsafe_get_user(current->thread.spefscr,
+				(u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
+#endif /* CONFIG_SPE */
+	}
 	return 0;
 
 failed:
-	user_read_access_end();
 	return 1;
 }
 
@@ -574,7 +574,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 				 struct mcontext __user *tm_sr)
 {
 	unsigned long msr, msr_hi;
-	int i;
 
 	if (tm_suspend_disabled)
 		return 1;
@@ -585,86 +584,81 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	 * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
 	 * were set by the signal delivery.
 	 */
-	if (!user_read_access_begin(sr, sizeof(*sr)))
-		return 1;
-
-	unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
-	unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
-	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+	scoped_user_read_access(sr, failed) {
+		unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+		unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+		unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
 
-	/* Restore the previous little-endian mode */
-	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+		/* Restore the previous little-endian mode */
+		regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
 
-	regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
-	if (msr & MSR_VEC) {
-		/* restore altivec registers from the stack */
-		unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
-				      sizeof(sr->mc_vregs), failed);
-		current->thread.used_vr = true;
-	} else if (current->thread.used_vr) {
-		memset(&current->thread.vr_state, 0,
-		       ELF_NVRREG * sizeof(vector128));
-		memset(&current->thread.ckvr_state, 0,
-		       ELF_NVRREG * sizeof(vector128));
-	}
+		regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
+		if (msr & MSR_VEC) {
+			/* restore altivec registers from the stack */
+			unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+					      sizeof(sr->mc_vregs), failed);
+			current->thread.used_vr = true;
+		} else if (current->thread.used_vr) {
+			memset(&current->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128));
+			memset(&current->thread.ckvr_state, 0, ELF_NVRREG * sizeof(vector128));
+		}
 
-	/* Always get VRSAVE back */
-	unsafe_get_user(current->thread.ckvrsave,
-			(u32 __user *)&sr->mc_vregs[32], failed);
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
+		/* Always get VRSAVE back */
+		unsafe_get_user(current->thread.ckvrsave,
+				(u32 __user *)&sr->mc_vregs[32], failed);
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
 
-	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
+		regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
 
-	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
+		unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
 
-	regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
-	if (msr & MSR_VSX) {
-		/*
-		 * Restore altivec registers from the stack to a local
-		 * buffer, then write this out to the thread_struct
-		 */
-		unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
-		current->thread.used_vsr = true;
-	} else if (current->thread.used_vsr)
-		for (i = 0; i < 32 ; i++) {
-			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
-			current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
+		if (msr & MSR_VSX) {
+			/*
+			 * Restore altivec registers from the stack to a local
+			 * buffer, then write this out to the thread_struct
+			 */
+			unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
+			current->thread.used_vsr = true;
+		} else if (current->thread.used_vsr) {
+			int i;
+
+			for (i = 0; i < 32 ; i++) {
+				current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+				current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			}
 		}
+	}
 
-	user_read_access_end();
-
-	if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
-		return 1;
+	scoped_user_read_access(tm_sr, failed) {
+		unsafe_restore_general_regs(regs, tm_sr, failed);
 
-	unsafe_restore_general_regs(regs, tm_sr, failed);
+		/* restore altivec registers from the stack */
+		if (msr & MSR_VEC)
+			unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+					      sizeof(sr->mc_vregs), failed);
 
-	/* restore altivec registers from the stack */
-	if (msr & MSR_VEC)
-		unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
-				      sizeof(sr->mc_vregs), failed);
+		/* Always get VRSAVE back */
+		unsafe_get_user(current->thread.vrsave,
+				(u32 __user *)&tm_sr->mc_vregs[32], failed);
 
-	/* Always get VRSAVE back */
-	unsafe_get_user(current->thread.vrsave,
-			(u32 __user *)&tm_sr->mc_vregs[32], failed);
+		unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
 
-	unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+		if (msr & MSR_VSX) {
+			/*
+			 * Restore altivec registers from the stack to a local
+			 * buffer, then write this out to the thread_struct
+			 */
+			unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+			current->thread.used_vsr = true;
+		}
 
-	if (msr & MSR_VSX) {
-		/*
-		 * Restore altivec registers from the stack to a local
-		 * buffer, then write this out to the thread_struct
-		 */
-		unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
-		current->thread.used_vsr = true;
+		/* Get the top half of the MSR from the user context */
+		unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
+		msr_hi <<= 32;
 	}
 
-	/* Get the top half of the MSR from the user context */
-	unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
-	msr_hi <<= 32;
-
-	user_read_access_end();
-
 	/* If TM bits are set to the reserved value, it's an invalid context */
 	if (MSR_TM_RESV(msr_hi))
 		return 1;
@@ -712,7 +706,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	return 0;
 
 failed:
-	user_read_access_end();
 	return 1;
 }
 #else
@@ -737,8 +730,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 		       struct task_struct *tsk)
 {
 	struct rt_sigframe __user *frame;
-	struct mcontext __user *mctx;
-	struct mcontext __user *tm_mctx = NULL;
 	unsigned long newsp = 0;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -747,52 +738,53 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
-	mctx = &frame->uc.uc_mcontext;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	tm_mctx = &frame->uc_transact.uc_mcontext;
-#endif
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	if (!user_access_begin(frame, sizeof(*frame)))
-		goto badframe;
+	scoped_user_rw_access(frame, badframe) {
+		struct mcontext __user *mctx;
+		struct mcontext __user *tm_mctx = NULL;
 
-	/* Put the siginfo & fill in most of the ucontext */
-	unsafe_put_user(0, &frame->uc.uc_flags, failed);
+		mctx = &frame->uc.uc_mcontext;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		tm_mctx = &frame->uc_transact.uc_mcontext;
+#endif
+		/* Put the siginfo & fill in most of the ucontext */
+		unsafe_put_user(0, &frame->uc.uc_flags, badframe);
 #ifdef CONFIG_PPC64
-	unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+		unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe);
 #else
-	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+		unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe);
 #endif
-	unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed);
+		unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, badframe);
 
-	if (MSR_TM_ACTIVE(msr)) {
+		if (MSR_TM_ACTIVE(msr)) {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-		unsafe_put_user((unsigned long)&frame->uc_transact,
-				&frame->uc.uc_link, failed);
-		unsafe_put_user((unsigned long)tm_mctx,
-				&frame->uc_transact.uc_regs, failed);
+			unsafe_put_user((unsigned long)&frame->uc_transact,
+					&frame->uc.uc_link, badframe);
+			unsafe_put_user((unsigned long)tm_mctx,
+					&frame->uc_transact.uc_regs, badframe);
 #endif
-		unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
-	} else {
-		unsafe_put_user(0, &frame->uc.uc_link, failed);
-		unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
-	}
+			unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, badframe);
+		} else {
+			unsafe_put_user(0, &frame->uc.uc_link, badframe);
+			unsafe_save_user_regs(regs, mctx, tm_mctx, 1, badframe);
+		}
 
-	/* Save user registers on the stack */
-	if (tsk->mm->context.vdso) {
-		tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
-	} else {
-		tramp = (unsigned long)mctx->mc_pad;
-		unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
-		unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
-		asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		/* Save user registers on the stack */
+		if (tsk->mm->context.vdso) {
+			tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
+		} else {
+			tramp = (unsigned long)mctx->mc_pad;
+			unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0],
+					badframe);
+			unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], badframe);
+			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		}
+		unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, badframe);
 	}
-	unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
-
-	user_access_end();
 
 	if (copy_siginfo_to_user(&frame->info, &ksig->info))
 		goto badframe;
@@ -820,9 +812,6 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	return 0;
 
-failed:
-	user_access_end();
-
 badframe:
 	signal_fault(tsk, regs, "handle_rt_signal32", frame);
 
@@ -837,8 +826,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 {
 	struct sigcontext __user *sc;
 	struct sigframe __user *frame;
-	struct mcontext __user *mctx;
-	struct mcontext __user *tm_mctx = NULL;
 	unsigned long newsp = 0;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -847,46 +834,49 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
-	mctx = &frame->mctx;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	tm_mctx = &frame->mctx_transact;
-#endif
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	if (!user_access_begin(frame, sizeof(*frame)))
-		goto badframe;
-	sc = (struct sigcontext __user *) &frame->sctx;
+	scoped_user_rw_access(frame, badframe) {
+		struct mcontext __user *mctx;
+		struct mcontext __user *tm_mctx = NULL;
+
+		mctx = &frame->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		tm_mctx = &frame->mctx_transact;
+#endif
+		sc = (struct sigcontext __user *)&frame->sctx;
 
 #if _NSIG != 64
 #error "Please adjust handle_signal()"
 #endif
-	unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed);
-	unsafe_put_user(oldset->sig[0], &sc->oldmask, failed);
+		unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, badframe);
+		unsafe_put_user(oldset->sig[0], &sc->oldmask, badframe);
 #ifdef CONFIG_PPC64
-	unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed);
+		unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], badframe);
 #else
-	unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed);
+		unsafe_put_user(oldset->sig[1], &sc->_unused[3], badframe);
 #endif
-	unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed);
-	unsafe_put_user(ksig->sig, &sc->signal, failed);
+		unsafe_put_user(to_user_ptr(mctx), &sc->regs, badframe);
+		unsafe_put_user(ksig->sig, &sc->signal, badframe);
 
-	if (MSR_TM_ACTIVE(msr))
-		unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
-	else
-		unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
-
-	if (tsk->mm->context.vdso) {
-		tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
-	} else {
-		tramp = (unsigned long)mctx->mc_pad;
-		unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
-		unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
-		asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		if (MSR_TM_ACTIVE(msr))
+			unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, badframe);
+		else
+			unsafe_save_user_regs(regs, mctx, tm_mctx, 1, badframe);
+
+		if (tsk->mm->context.vdso) {
+			tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
+		} else {
+			tramp = (unsigned long)mctx->mc_pad;
+			unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0],
+					badframe);
+			unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], badframe);
+			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+		}
 	}
-	user_access_end();
 
 	regs->link = tramp;
 
@@ -908,9 +898,6 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	return 0;
 
-failed:
-	user_access_end();
-
 badframe:
 	signal_fault(tsk, regs, "handle_signal32", frame);
 
@@ -922,21 +909,19 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 	sigset_t set;
 	struct mcontext __user *mcp;
 
-	if (!user_read_access_begin(ucp, sizeof(*ucp)))
-		return -EFAULT;
-
-	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+	scoped_user_read_access(ucp, failed) {
+		unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
 #ifdef CONFIG_PPC64
-	{
-		u32 cmcp;
+		{
+			u32 cmcp;
 
-		unsafe_get_user(cmcp, &ucp->uc_regs, failed);
-		mcp = (struct mcontext __user *)(u64)cmcp;
-	}
+			unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+			mcp = (struct mcontext __user *)(u64)cmcp;
+		}
 #else
-	unsafe_get_user(mcp, &ucp->uc_regs, failed);
+		unsafe_get_user(mcp, &ucp->uc_regs, failed);
 #endif
-	user_read_access_end();
+	}
 
 	set_current_blocked(&set);
 	if (restore_user_regs(regs, mcp, sig))
@@ -945,7 +930,6 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 	return 0;
 
 failed:
-	user_read_access_end();
 	return -EFAULT;
 }
 
@@ -960,13 +944,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 	u32 cmcp;
 	u32 tm_cmcp;
 
-	if (!user_read_access_begin(ucp, sizeof(*ucp)))
-		return -EFAULT;
-
-	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
-	unsafe_get_user(cmcp, &ucp->uc_regs, failed);
-
-	user_read_access_end();
+	scoped_user_read_access(ucp, failed) {
+		unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+		unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+	}
 
 	if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
 		return -EFAULT;
@@ -981,7 +962,6 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
 	return 0;
 
 failed:
-	user_read_access_end();
 	return -EFAULT;
 }
 #endif
@@ -1051,12 +1031,11 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		mctx = (struct mcontext __user *)
 			((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
 		prepare_save_user_regs(ctx_has_vsx_region);
-		if (!user_write_access_begin(old_ctx, ctx_size))
-			return -EFAULT;
-		unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
-		unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
-		unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
-		user_write_access_end();
+		scoped_user_write_access_size(old_ctx, ctx_size, failed) {
+			unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
+			unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
+			unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
+		}
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -1084,7 +1063,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	return 0;
 
 failed:
-	user_write_access_end();
 	return -EFAULT;
 }
 
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 3/8] powerpc/signal64: Convert to scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Commit 861574d51bbd ("powerpc/uaccess: Implement masked user access")
provides optimised user access by avoiding the cost of access_ok().

Convert signal64 functions to scoped user access.

Scoped user access also make the code simpler.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_64.c | 81 +++++++++++++--------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 3849af21e1d8..ee8166fd83dc 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -683,15 +683,12 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 
 	if (old_ctx != NULL) {
 		prepare_setup_sigcontext(current);
-		if (!user_write_access_begin(old_ctx, ctx_size))
-			return -EFAULT;
-
-		unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
-					0, ctx_has_vsx_region, efault_out);
-		unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
-				    sizeof(sigset_t), efault_out);
-
-		user_write_access_end();
+		scoped_user_write_access_size(old_ctx, ctx_size, efault_out) {
+			unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+						0, ctx_has_vsx_region, efault_out);
+			unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+					    sizeof(sigset_t), efault_out);
+		}
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -717,14 +714,12 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	}
 	set_current_blocked(&set);
 
-	if (!user_read_access_begin(new_ctx, ctx_size))
-		return -EFAULT;
-	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
-		user_read_access_end();
-		force_exit_sig(SIGSEGV);
-		return -EFAULT;
+	scoped_user_read_access_size(new_ctx, ctx_size, efault_out) {
+		if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+			force_exit_sig(SIGSEGV);
+			return -EFAULT;
+		}
 	}
-	user_read_access_end();
 
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
@@ -732,7 +727,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	return 0;
 
 efault_out:
-	user_write_access_end();
 	return -EFAULT;
 }
 
@@ -815,6 +809,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 					   &uc_transact->uc_mcontext))
 			goto badframe;
 	} else {
+		struct sigcontext __user *uc_mcontext = &uc->uc_mcontext;
 		/*
 		 * Fall through, for non-TM restore
 		 *
@@ -829,13 +824,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		 */
 		regs_set_return_msr(current->thread.regs,
 				current->thread.regs->msr & ~MSR_TS_MASK);
-		if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
-			goto badframe;
-
-		unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
-					  badframe_block);
-
-		user_read_access_end();
+		scoped_user_read_access(uc_mcontext, badframe)
+			unsafe_restore_sigcontext(current, NULL, 1, uc_mcontext, badframe);
 	}
 
 	if (restore_altstack(&uc->uc_stack))
@@ -845,8 +835,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
 
 	return 0;
 
-badframe_block:
-	user_read_access_end();
 badframe:
 	signal_fault(current, regs, "rt_sigreturn", uc);
 
@@ -882,32 +870,29 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 					    msr);
 
 #endif
-	if (!user_write_access_begin(frame, sizeof(*frame)))
-		goto badframe;
+	scoped_user_write_access(frame, badframe) {
+		unsafe_put_user(&frame->info, &frame->pinfo, badframe);
+		unsafe_put_user(&frame->uc, &frame->puc, badframe);
 
-	unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
-	unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+		/* Create the ucontext.  */
+		unsafe_put_user(0, &frame->uc.uc_flags, badframe);
+		unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe);
 
-	/* Create the ucontext.  */
-	unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
-	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
-
-	if (MSR_TM_ACTIVE(msr)) {
+		if (MSR_TM_ACTIVE(msr)) {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-		/* The ucontext_t passed to userland points to the second
-		 * ucontext_t (for transactional state) with its uc_link ptr.
-		 */
-		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+			/* The ucontext_t passed to userland points to the second
+			 * ucontext_t (for transactional state) with its uc_link ptr.
+			 */
+			unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe);
 #endif
-	} else {
-		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
-		unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
-					NULL, (unsigned long)ksig->ka.sa.sa_handler,
-					1, badframe_block);
-	}
+		} else {
+			unsafe_put_user(0, &frame->uc.uc_link, badframe);
+			unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL,
+						(unsigned long)ksig->ka.sa.sa_handler, 1, badframe);
+		}
 
-	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
-	user_write_access_end();
+		unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe);
+	}
 
 	/* Save the siginfo outside of the unsafe block. */
 	if (copy_siginfo_to_user(&frame->info, &ksig->info))
@@ -964,8 +949,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	return 0;
 
-badframe_block:
-	user_write_access_end();
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 4/8] powerpc/signal64: Access function descriptor with scoped user access
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Access the function descriptor of the handler within a scoped
user access block.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_64.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ee8166fd83dc..bf7fc579d572 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -928,8 +928,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		struct func_desc __user *ptr =
 			(struct func_desc __user *)ksig->ka.sa.sa_handler;
 
-		err |= get_user(regs->ctr, &ptr->addr);
-		err |= get_user(regs->gpr[2], &ptr->toc);
+		scoped_user_read_access(ptr, badfunc) {
+			unsafe_get_user(regs->ctr, &ptr->addr, badfunc);
+			unsafe_get_user(regs->gpr[2], &ptr->toc, badfunc);
+		}
 	}
 
 	/* enter the signal handler in native-endian mode */
@@ -952,5 +954,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
+	return 1;
+
+badfunc:
+	signal_fault(current, regs, __func__, (void __user *)ksig->ka.sa.sa_handler);
+
 	return 1;
 }
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 5/8] powerpc/signal: Include the new stack frame inside the user access block
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Include the new stack frame inside the user access block and set it up
using unsafe_put_user().

On an mpc 8321 (book3s/32) the improvment is about 4% on a process
sending a signal to itself.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_32.c | 28 ++++++++++++----------------
 arch/powerpc/kernel/signal_64.c | 13 ++++++-------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index f5d5139a1426..6b1fbd95b07d 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -730,7 +730,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 		       struct task_struct *tsk)
 {
 	struct rt_sigframe __user *frame;
-	unsigned long newsp = 0;
+	unsigned long __user *newsp;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
@@ -738,12 +738,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+	newsp = (unsigned long __user *)((unsigned long)frame - (__SIGNAL_FRAMESIZE + 16));
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	scoped_user_rw_access(frame, badframe) {
+	scoped_user_rw_access_size(newsp, __SIGNAL_FRAMESIZE + 16 + sizeof(*frame), badframe) {
 		struct mcontext __user *mctx;
 		struct mcontext __user *tm_mctx = NULL;
 
@@ -784,6 +785,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
 		}
 		unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, badframe);
+
+		/* create a stack frame for the caller of the handler */
+		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
 	if (copy_siginfo_to_user(&frame->info, &ksig->info))
@@ -795,13 +799,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 #endif
 
-	/* create a stack frame for the caller of the handler */
-	newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16);
-	if (put_user(regs->gpr[1], (u32 __user *)newsp))
-		goto badframe;
-
 	/* Fill registers for signal handler */
-	regs->gpr[1] = newsp;
+	regs->gpr[1] = (unsigned long)newsp;
 	regs->gpr[3] = ksig->sig;
 	regs->gpr[4] = (unsigned long)&frame->info;
 	regs->gpr[5] = (unsigned long)&frame->uc;
@@ -826,7 +825,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 {
 	struct sigcontext __user *sc;
 	struct sigframe __user *frame;
-	unsigned long newsp = 0;
+	unsigned long __user *newsp;
 	unsigned long tramp;
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
@@ -834,12 +833,13 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+	newsp = (unsigned long __user *)((unsigned long)frame - __SIGNAL_FRAMESIZE);
 	if (MSR_TM_ACTIVE(msr))
 		prepare_save_tm_user_regs();
 	else
 		prepare_save_user_regs(1);
 
-	scoped_user_rw_access(frame, badframe) {
+	scoped_user_rw_access_size(newsp, __SIGNAL_FRAMESIZE + sizeof(*frame), badframe) {
 		struct mcontext __user *mctx;
 		struct mcontext __user *tm_mctx = NULL;
 
@@ -876,6 +876,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 			unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], badframe);
 			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
 		}
+		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
 	regs->link = tramp;
@@ -884,12 +885,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 #endif
 
-	/* create a stack frame for the caller of the handler */
-	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
-	if (put_user(regs->gpr[1], (u32 __user *)newsp))
-		goto badframe;
-
-	regs->gpr[1] = newsp;
+	regs->gpr[1] = (unsigned long)newsp;
 	regs->gpr[3] = ksig->sig;
 	regs->gpr[4] = (unsigned long) sc;
 	regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index bf7fc579d572..67de29cf581a 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -846,13 +846,14 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		struct task_struct *tsk)
 {
 	struct rt_sigframe __user *frame;
-	unsigned long newsp = 0;
+	unsigned long __user *newsp;
 	long err = 0;
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
+	newsp = (unsigned long __user *)((unsigned long)frame - __SIGNAL_FRAMESIZE);
 
 	/*
 	 * This only applies when calling unsafe_setup_sigcontext() and must be
@@ -870,7 +871,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 					    msr);
 
 #endif
-	scoped_user_write_access(frame, badframe) {
+	scoped_user_write_access_size(newsp, __SIGNAL_FRAMESIZE + sizeof(*frame), badframe) {
 		unsafe_put_user(&frame->info, &frame->pinfo, badframe);
 		unsafe_put_user(&frame->uc, &frame->puc, badframe);
 
@@ -892,6 +893,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		}
 
 		unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe);
+		/* Allocate a dummy caller frame for the signal handler. */
+		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
 	/* Save the siginfo outside of the unsafe block. */
@@ -911,10 +914,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
 	}
 
-	/* Allocate a dummy caller frame for the signal handler. */
-	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
-	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
-
 	/* Set up "regs" so we "return" to the signal handler. */
 	if (is_elf2_task()) {
 		regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
@@ -936,7 +935,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	/* enter the signal handler in native-endian mode */
 	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
-	regs->gpr[1] = newsp;
+	regs->gpr[1] = (unsigned long)newsp;
 	regs->gpr[3] = ksig->sig;
 	regs->result = 0;
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 6/8] signal: Add unsafe_copy_siginfo_to_user()
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

In the same spirit as commit fb05121fd6a2 ("signal: Add
unsafe_get_compat_sigset()"), implement an 'unsafe' version of
copy_siginfo_to_user() in order to use it within user access blocks.

For that, also add an 'unsafe' version of clear_user().

This commit adds the generic fallback for unsafe_clear_user().
Architectures wanting to use unsafe_copy_siginfo_to_user() within a
user_access_begin() section have to make sure they have their
own unsafe_clear_user().

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 include/linux/signal.h  | 15 +++++++++++++++
 include/linux/uaccess.h |  1 +
 kernel/signal.c         |  5 -----
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/linux/signal.h b/include/linux/signal.h
index f19816832f05..3ee6c9463f8b 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -36,6 +36,21 @@ static inline void copy_siginfo_to_external(siginfo_t *to,
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
+static __always_inline char __user *si_expansion(const siginfo_t __user *info)
+{
+	return ((char __user *)info) + sizeof(struct kernel_siginfo);
+}
+
+#define unsafe_copy_siginfo_to_user(to, from, label) do {		\
+	siginfo_t __user *__ucs_to = to;				\
+	const kernel_siginfo_t *__ucs_from = from;			\
+	char __user *__ucs_expansion = si_expansion(__ucs_to);		\
+									\
+	unsafe_copy_to_user(__ucs_to, __ucs_from,			\
+			    sizeof(struct kernel_siginfo), label);	\
+	unsafe_clear_user(__ucs_expansion, SI_EXPANSION_SIZE, label);	\
+} while (0)
+
 enum siginfo_layout {
 	SIL_KILL,
 	SIL_TIMER,
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 56328601218c..43e573b172a2 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -636,6 +636,7 @@ do {							\
 #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
 #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
 #define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
+#define unsafe_clear_user(d, l, e) unsafe_op_wrap(__clear_user(d, l), e)
 static inline unsigned long user_access_save(void) { return 0UL; }
 static inline void user_access_restore(unsigned long flags) { }
 #endif /* !user_access_begin */
diff --git a/kernel/signal.c b/kernel/signal.c
index 2d102e025883..2c5eb741fe8c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3493,11 +3493,6 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
 	return layout;
 }
 
-static inline char __user *si_expansion(const siginfo_t __user *info)
-{
-	return ((char __user *)info) + sizeof(struct kernel_siginfo);
-}
-
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from)
 {
 	char __user *expansion = si_expansion(to);
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 7/8] powerpc/uaccess: Add unsafe_clear_user()
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Implement unsafe_clear_user() for powerpc.
It's a copy/paste of unsafe_copy_to_user() with value 0 as source.

It may be improved in a later patch by using 'dcbz' instruction
to zeroize full cache lines at once.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/include/asm/uaccess.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index e98c628e3899..ef6711d1278b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -588,6 +588,26 @@ do {									\
 		unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \
 } while (0)
 
+#define unsafe_clear_user(d, l, e)					\
+do {									\
+	u8 __user *_dst = (u8 __user *)(d);				\
+	size_t _len = (l);						\
+	int _i;								\
+									\
+	for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \
+		unsafe_put_user(0, (u64 __user *)(_dst + _i), e);	\
+	if (_len & 4) {							\
+		unsafe_put_user(0, (u32 __user *)(_dst + _i), e);	\
+		_i += 4;						\
+	}								\
+	if (_len & 2) {							\
+		unsafe_put_user(0, (u16 __user *)(_dst + _i), e);	\
+		_i += 2;						\
+	}								\
+	if (_len & 1)							\
+		unsafe_put_user(0, (u8 __user *)(_dst + _i), e);	\
+} while (0)
+
 #define arch_get_kernel_nofault(dst, src, type, err_label)		\
 	__get_user_size_goto(*((type *)(dst)),				\
 		(__force type __user *)(src), sizeof(type), err_label)
-- 
2.54.0



^ permalink raw reply related

* [PATCH v1 8/8] powerpc/signal: Use unsafe_copy_siginfo_to_user()
From: Christophe Leroy (CS GROUP) @ 2026-05-22  9:56 UTC (permalink / raw)
  To: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan
  Cc: Christophe Leroy (CS GROUP), linux-kernel, linuxppc-dev
In-Reply-To: <cover.1779441786.git.chleroy@kernel.org>

Use unsafe_copy_siginfo_to_user() in order to do the copy
within the user access block.

On an mpc 8321 (book3s/32) the improvment is about 5% on a process
sending a signal to itself.

Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
---
 arch/powerpc/kernel/signal_32.c | 18 +++++++++---------
 arch/powerpc/kernel/signal_64.c |  5 +----
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 6b1fbd95b07d..99a3efa874eb 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -716,12 +716,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *s
 }
 #endif
 
-#ifdef CONFIG_PPC64
-
-#define copy_siginfo_to_user	copy_siginfo_to_user32
-
-#endif /* CONFIG_PPC64 */
-
 /*
  * Set up a signal frame for a "real-time" signal handler
  * (one which gets siginfo).
@@ -735,6 +729,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	struct pt_regs *regs = tsk->thread.regs;
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
+	compat_siginfo_t uinfo;
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
@@ -744,6 +739,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	else
 		prepare_save_user_regs(1);
 
+	if (IS_ENABLED(CONFIG_COMPAT))
+		copy_siginfo_to_external32(&uinfo, &ksig->info);
+
 	scoped_user_rw_access_size(newsp, __SIGNAL_FRAMESIZE + 16 + sizeof(*frame), badframe) {
 		struct mcontext __user *mctx;
 		struct mcontext __user *tm_mctx = NULL;
@@ -785,14 +783,16 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
 		}
 		unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, badframe);
+		if (IS_ENABLED(CONFIG_COMPAT))
+			unsafe_copy_to_user(&frame->info, &uinfo, sizeof(frame->info), badframe);
+		else
+			unsafe_copy_siginfo_to_user((void __user *)&frame->info, &ksig->info,
+						    badframe);
 
 		/* create a stack frame for the caller of the handler */
 		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
-	if (copy_siginfo_to_user(&frame->info, &ksig->info))
-		goto badframe;
-
 	regs->link = tramp;
 
 #ifdef CONFIG_PPC_FPU_REGS
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 67de29cf581a..fa18ac43eb2a 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -893,14 +893,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		}
 
 		unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe);
+		unsafe_copy_siginfo_to_user(&frame->info, &ksig->info, badframe);
 		/* Allocate a dummy caller frame for the signal handler. */
 		unsafe_put_user(regs->gpr[1], newsp, badframe);
 	}
 
-	/* Save the siginfo outside of the unsafe block. */
-	if (copy_siginfo_to_user(&frame->info, &ksig->info))
-		goto badframe;
-
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	tsk->thread.fp_state.fpscr = 0;
 
-- 
2.54.0



^ permalink raw reply related

* Re: [PATCH] ASoC: fsl_sai: Eliminate possible interrupt storm during probe
From: Mark Brown @ 2026-05-19  9:54 UTC (permalink / raw)
  To: shengjiu.wang, Xiubo.Lee, festevam, nicoleotsuka, lgirdwood,
	perex, tiwai, linux-sound, linuxppc-dev, linux-kernel,
	Shengjiu Wang
In-Reply-To: <20260512065252.75859-1-shengjiu.wang@nxp.com>

On Tue, 12 May 2026 14:52:52 +0800, Shengjiu Wang wrote:
> ASoC: fsl_sai: Eliminate possible interrupt storm during probe

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-7.1

Thanks!

[1/1] ASoC: fsl_sai: Eliminate possible interrupt storm during probe
      https://git.kernel.org/broonie/sound/c/fd3b95866d86

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* Re: [PATCH v5] char/nvram: Remove redundant nvram_mutex
From: Christophe Leroy (CS GROUP) @ 2026-05-22 10:28 UTC (permalink / raw)
  To: Venkat, Greg Kroah-Hartman
  Cc: linux-kernel, linux-kbuild, linuxppc-dev, Arnd Bergmann,
	Christophe Leroy, Ritesh Harjani, Madhavan Srinivasan,
	Tellakula Yeswanth Krishna
In-Reply-To: <FC8BDC39-4627-4532-B6FB-C4B88F4DF80E@linux.ibm.com>



Le 14/05/2026 à 05:57, Venkat a écrit :
> 
> Hi,
> 
> Gentle ping on this patch.
> 
> This removes the unused global nvram_mutex and relies on the
> existing per-architecture synchronization, as suggested earlier.
> 
> I’ve re-tested the change, and everything continues to work as expected.
> No issues observed in validation.
> 
> Please let me know if any further changes are needed.
> 
> Thanks,
> Venkat
> 
> 
>> On 28 Apr 2026, at 11:45 AM, Venkat Rao Bagalkote <venkat88@linux.ibm.com> wrote:
>>
>> The global nvram_mutex in drivers/char/nvram.c is redundant and unused,

Redundant with what ?

It is _used_, at least in nvram_misc_ioctl()


>> and this triggers compiler warnings on some configurations.
>>
>> All platform-specific nvram operations already provide their own internal
>> synchronization, meaning the wrapper-level mutex does not provide any
>> additional safety.

Indeed, this is what it is redundant with, I would say that first thing 
in the message.

I think it would also be worth providing the history from Arnd from 
here: 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20260323072422.25730-1-venkat88@linux.ibm.com/#3667538


>>
>> Remove the nvram_mutex definition along with all remaining lock/unlock
>> users across PPC32, x86, and m68k code paths, and rely entirely on the
>> per-architecture nvram implementations for locking.
>>
>> Reviewed-by: Arnd Bergmann <arnd@arndb.de>
>> Suggested-by: Arnd Bergmann <arnd@arndb.de>
>> Tested-by: Tellakula Yeswanth Krishna <yeswanth@linux.ibm.com>
>> Signed-off-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>

With the above changes, Reviewed-by: Christophe Leroy (CS GROUP) 
<chleroy@kernel.org>

>> ---
>> Changes since v4:
>> - No code changes
>> - Resent after v7.1-rc1 as suggested by Arnd Bergmann
>>
>> drivers/char/nvram.c | 16 +++-------------
>> 1 file changed, 3 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
>> index 9eff426a9286..e89cc1f1c89e 100644
>> --- a/drivers/char/nvram.c
>> +++ b/drivers/char/nvram.c
>> @@ -53,7 +53,6 @@
>> #include <asm/nvram.h>
>> #endif
>>
>> -static DEFINE_MUTEX(nvram_mutex);
>> static DEFINE_SPINLOCK(nvram_state_lock);
>> static int nvram_open_cnt; /* #times opened */
>> static int nvram_open_mode; /* special open modes */
>> @@ -310,11 +309,8 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
>> break;
>> #ifdef CONFIG_PPC32
>> case IOC_NVRAM_SYNC:
>> - if (ppc_md.nvram_sync != NULL) {
>> - mutex_lock(&nvram_mutex);
>> + if (ppc_md.nvram_sync)
>> ppc_md.nvram_sync();
>> - mutex_unlock(&nvram_mutex);
>> - }
>> ret = 0;
>> break;
>> #endif
>> @@ -324,11 +320,8 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
>> if (!capable(CAP_SYS_ADMIN))
>> return -EACCES;
>>
>> - if (arch_nvram_ops.initialize != NULL) {
>> - mutex_lock(&nvram_mutex);
>> + if (arch_nvram_ops.initialize)
>> ret = arch_nvram_ops.initialize();
>> - mutex_unlock(&nvram_mutex);
>> - }
>> break;
>> case NVRAM_SETCKS:
>> /* just set checksum, contents unchanged (maybe useful after
>> @@ -336,11 +329,8 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
>> if (!capable(CAP_SYS_ADMIN))
>> return -EACCES;
>>
>> - if (arch_nvram_ops.set_checksum != NULL) {
>> - mutex_lock(&nvram_mutex);
>> + if (arch_nvram_ops.set_checksum)
>> ret = arch_nvram_ops.set_checksum();
>> - mutex_unlock(&nvram_mutex);
>> - }
>> break;
>> #endif /* CONFIG_X86 || CONFIG_M68K */
>> }
>> -- 
>> 2.45.2
>>
> 



^ permalink raw reply

* Re: [PATCH v1 2/8] powerpc/signal64: Untangle setup_tm_sigcontexts() and user_access_begin()
From: David Laight @ 2026-05-22 11:12 UTC (permalink / raw)
  To: Christophe Leroy (CS GROUP)
  Cc: Michael Ellerman, Nicholas Piggin, Madhavan Srinivasan,
	linux-kernel, linuxppc-dev
In-Reply-To: <7c83fe5a596514a02fde37da987a48d638d0d097.1779441786.git.chleroy@kernel.org>

On Fri, 22 May 2026 11:56:02 +0200
"Christophe Leroy (CS GROUP)" <chleroy@kernel.org> wrote:

> Call setup_tm_sigcontexts() before opening user access to avoid
> having to close and open again.
> 
> Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
> ---
>  arch/powerpc/kernel/signal_64.c | 22 +++++++++-------------
>  1 file changed, 9 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index 86bb5bb4c143..3849af21e1d8 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -873,6 +873,15 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
>  	if (!MSR_TM_ACTIVE(msr))
>  		prepare_setup_sigcontext(tsk);
>  
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +	if (MSR_TM_ACTIVE(msr))

Can't that be done without the ugly #ifdef?
I assume MSR_TM_ACTIVE() will be zero - so it will all get optimised away.

-- David

> +		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
> +					    &frame->uc_transact.uc_mcontext,
> +					    tsk, ksig->sig, NULL,
> +					    (unsigned long)ksig->ka.sa.sa_handler,
> +					    msr);
> +
> +#endif
>  	if (!user_write_access_begin(frame, sizeof(*frame)))
>  		goto badframe;
>  
> @@ -889,19 +898,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
>  		 * ucontext_t (for transactional state) with its uc_link ptr.
>  		 */
>  		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
> -
> -		user_write_access_end();
> -
> -		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
> -					    &frame->uc_transact.uc_mcontext,
> -					    tsk, ksig->sig, NULL,
> -					    (unsigned long)ksig->ka.sa.sa_handler,
> -					    msr);
> -
> -		if (!user_write_access_begin(&frame->uc.uc_sigmask,
> -					     sizeof(frame->uc.uc_sigmask)))
> -			goto badframe;
> -
>  #endif
>  	} else {
>  		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);



^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox