LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 03/14] swiotlb: Add struct swiotlb
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Added a new struct, swiotlb, as the IO TLB memory pool descriptor and
moved relevant global variables into that struct.
This will be useful later to allow for restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/swiotlb.c | 327 +++++++++++++++++++++++--------------------
 1 file changed, 172 insertions(+), 155 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 678490d39e55..28b7bfe7a2a8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -61,33 +61,43 @@
  * allocate a contiguous 1MB, we're probably in trouble anyway.
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
 enum swiotlb_force swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
+ * struct swiotlb - Software IO TLB Memory Pool Descriptor
+ *
+ * @start:      The start address of the swiotlb memory pool. Used to do a quick
+ *              range check to see if the memory was in fact allocated by this
+ *              API.
+ * @end:        The end address of the swiotlb memory pool. Used to do a quick
+ *              range check to see if the memory was in fact allocated by this
+ *              API.
+ * @nslabs:     The number of IO TLB blocks (in groups of 64) between @start and
+ *              @end. This is command line adjustable via setup_io_tlb_npages.
+ * @used:       The number of used IO TLB block.
+ * @list:       The free list describing the number of free entries available
+ *              from each index.
+ * @index:      The index to start searching in the next round.
+ * @orig_addr:  The original address corresponding to a mapped entry for the
+ *              sync operations.
+ * @lock:       The lock to protect the above data structures in the map and
+ *              unmap calls.
+ * @debugfs:    The dentry to debugfs.
  */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+struct swiotlb {
+	phys_addr_t start;
+	phys_addr_t end;
+	unsigned long nslabs;
+	unsigned long used;
+	unsigned int *list;
+	unsigned int index;
+	phys_addr_t *orig_addr;
+	spinlock_t lock;
+	struct dentry *debugfs;
+};
+static struct swiotlb default_swiotlb;
 
 /*
  * Max segment that we can provide which (if pages are contingous) will
@@ -95,27 +105,17 @@ static unsigned int io_tlb_index;
  */
 static unsigned int max_segment;
 
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
 static int late_alloc;
 
 static int __init
 setup_io_tlb_npages(char *str)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
+
 	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
+		swiotlb->nslabs = simple_strtoul(str, &str, 0);
 		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
 	}
 	if (*str == ',')
 		++str;
@@ -123,7 +123,7 @@ setup_io_tlb_npages(char *str)
 		swiotlb_force = SWIOTLB_FORCE;
 	} else if (!strcmp(str, "noforce")) {
 		swiotlb_force = SWIOTLB_NO_FORCE;
-		io_tlb_nslabs = 1;
+		swiotlb->nslabs = 1;
 	}
 
 	return 0;
@@ -134,7 +134,7 @@ static bool no_iotlb_memory;
 
 unsigned long swiotlb_nr_tbl(void)
 {
-	return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
+	return unlikely(no_iotlb_memory) ? 0 : default_swiotlb.nslabs;
 }
 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 
@@ -156,13 +156,14 @@ unsigned long swiotlb_size_or_default(void)
 {
 	unsigned long size;
 
-	size = io_tlb_nslabs << IO_TLB_SHIFT;
+	size = default_swiotlb.nslabs << IO_TLB_SHIFT;
 
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
 void __init swiotlb_adjust_size(unsigned long new_size)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
 	unsigned long size;
 
 	/*
@@ -170,10 +171,10 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 	 * architectures such as those supporting memory encryption to
 	 * adjust/expand SWIOTLB size for their use.
 	 */
-	if (!io_tlb_nslabs) {
+	if (!swiotlb->nslabs) {
 		size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
-		io_tlb_nslabs = size >> IO_TLB_SHIFT;
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		swiotlb->nslabs = size >> IO_TLB_SHIFT;
+		swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
 
 		pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
 	}
@@ -181,14 +182,15 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 
 void swiotlb_print_info(void)
 {
-	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	struct swiotlb *swiotlb = &default_swiotlb;
+	unsigned long bytes = swiotlb->nslabs << IO_TLB_SHIFT;
 
 	if (no_iotlb_memory) {
 		pr_warn("No low mem\n");
 		return;
 	}
 
-	pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end,
+	pr_info("mapped [mem %pa-%pa] (%luMB)\n", &swiotlb->start, &swiotlb->end,
 	       bytes >> 20);
 }
 
@@ -200,57 +202,61 @@ void swiotlb_print_info(void)
  */
 void __init swiotlb_update_mem_attributes(void)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
 	void *vaddr;
 	unsigned long bytes;
 
 	if (no_iotlb_memory || late_alloc)
 		return;
 
-	vaddr = phys_to_virt(io_tlb_start);
-	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
+	vaddr = phys_to_virt(swiotlb->start);
+	bytes = PAGE_ALIGN(swiotlb->nslabs << IO_TLB_SHIFT);
 	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
 	unsigned long i, bytes;
 	size_t alloc_size;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = __pa(tlb);
-	io_tlb_end = io_tlb_start + bytes;
+	swiotlb->nslabs = nslabs;
+	swiotlb->start = __pa(tlb);
+	swiotlb->end = swiotlb->start + bytes;
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
+	 * between swiotlb->start and swiotlb->end.
 	 */
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
-	io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_list)
+	alloc_size = PAGE_ALIGN(swiotlb->nslabs * sizeof(int));
+	swiotlb->list = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!swiotlb->list)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
-	io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_orig_addr)
+	alloc_size = PAGE_ALIGN(swiotlb->nslabs * sizeof(phys_addr_t));
+	swiotlb->orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!swiotlb->orig_addr)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < swiotlb->nslabs; i++) {
+		swiotlb->list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		swiotlb->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index = 0;
+	swiotlb->index = 0;
 	no_iotlb_memory = false;
 
 	if (verbose)
 		swiotlb_print_info();
 
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(swiotlb->nslabs << IO_TLB_SHIFT);
+	spin_lock_init(&swiotlb->lock);
+
 	return 0;
 }
 
@@ -261,26 +267,27 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 void  __init
 swiotlb_init(int verbose)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
 	size_t default_size = IO_TLB_DEFAULT_SIZE;
 	unsigned char *vstart;
 	unsigned long bytes;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	if (!swiotlb->nslabs) {
+		swiotlb->nslabs = (default_size >> IO_TLB_SHIFT);
+		swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
 	}
 
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	bytes = swiotlb->nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
 	vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+	if (vstart && !swiotlb_init_with_tbl(vstart, swiotlb->nslabs, verbose))
 		return;
 
-	if (io_tlb_start) {
-		memblock_free_early(io_tlb_start,
-				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-		io_tlb_start = 0;
+	if (swiotlb->start) {
+		memblock_free_early(swiotlb->start,
+				    PAGE_ALIGN(swiotlb->nslabs << IO_TLB_SHIFT));
+		swiotlb->start = 0;
 	}
 	pr_warn("Cannot allocate buffer");
 	no_iotlb_memory = true;
@@ -294,22 +301,23 @@ swiotlb_init(int verbose)
 int
 swiotlb_late_init_with_default_size(size_t default_size)
 {
-	unsigned long bytes, req_nslabs = io_tlb_nslabs;
+	struct swiotlb *swiotlb = &default_swiotlb;
+	unsigned long bytes, req_nslabs = swiotlb->nslabs;
 	unsigned char *vstart = NULL;
 	unsigned int order;
 	int rc = 0;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	if (!swiotlb->nslabs) {
+		swiotlb->nslabs = (default_size >> IO_TLB_SHIFT);
+		swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
 	}
 
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	order = get_order(swiotlb->nslabs << IO_TLB_SHIFT);
+	swiotlb->nslabs = SLABS_PER_PAGE << order;
+	bytes = swiotlb->nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
 		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
@@ -320,15 +328,15 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	}
 
 	if (!vstart) {
-		io_tlb_nslabs = req_nslabs;
+		swiotlb->nslabs = req_nslabs;
 		return -ENOMEM;
 	}
 	if (order != get_order(bytes)) {
 		pr_warn("only able to allocate %ld MB\n",
 			(PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
+		swiotlb->nslabs = SLABS_PER_PAGE << order;
 	}
-	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
+	rc = swiotlb_late_init_with_tbl(vstart, swiotlb->nslabs);
 	if (rc)
 		free_pages((unsigned long)vstart, order);
 
@@ -337,22 +345,25 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 static void swiotlb_cleanup(void)
 {
-	io_tlb_end = 0;
-	io_tlb_start = 0;
-	io_tlb_nslabs = 0;
+	struct swiotlb *swiotlb = &default_swiotlb;
+
+	swiotlb->end = 0;
+	swiotlb->start = 0;
+	swiotlb->nslabs = 0;
 	max_segment = 0;
 }
 
 int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
 	unsigned long i, bytes;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = virt_to_phys(tlb);
-	io_tlb_end = io_tlb_start + bytes;
+	swiotlb->nslabs = nslabs;
+	swiotlb->start = virt_to_phys(tlb);
+	swiotlb->end = swiotlb->start + bytes;
 
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
@@ -360,39 +371,40 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
+	 * between swiotlb->start and swiotlb->end.
 	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
+	swiotlb->list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+	                              get_order(swiotlb->nslabs * sizeof(int)));
+	if (!swiotlb->list)
 		goto cleanup3;
 
-	io_tlb_orig_addr = (phys_addr_t *)
+	swiotlb->orig_addr = (phys_addr_t *)
 		__get_free_pages(GFP_KERNEL,
-				 get_order(io_tlb_nslabs *
+				 get_order(swiotlb->nslabs *
 					   sizeof(phys_addr_t)));
-	if (!io_tlb_orig_addr)
+	if (!swiotlb->orig_addr)
 		goto cleanup4;
 
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < swiotlb->nslabs; i++) {
+		swiotlb->list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		swiotlb->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index = 0;
+	swiotlb->index = 0;
 	no_iotlb_memory = false;
 
 	swiotlb_print_info();
 
 	late_alloc = 1;
 
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(swiotlb->nslabs << IO_TLB_SHIFT);
+	spin_lock_init(&swiotlb->lock);
 
 	return 0;
 
 cleanup4:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-	                                                 sizeof(int)));
-	io_tlb_list = NULL;
+	free_pages((unsigned long)swiotlb->list,
+		   get_order(swiotlb->nslabs * sizeof(int)));
+	swiotlb->list = NULL;
 cleanup3:
 	swiotlb_cleanup();
 	return -ENOMEM;
@@ -400,23 +412,25 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
 void __init swiotlb_exit(void)
 {
-	if (!io_tlb_orig_addr)
+	struct swiotlb *swiotlb = &default_swiotlb;
+
+	if (!swiotlb->orig_addr)
 		return;
 
 	if (late_alloc) {
-		free_pages((unsigned long)io_tlb_orig_addr,
-			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-								 sizeof(int)));
-		free_pages((unsigned long)phys_to_virt(io_tlb_start),
-			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+		free_pages((unsigned long)swiotlb->orig_addr,
+			   get_order(swiotlb->nslabs * sizeof(phys_addr_t)));
+		free_pages((unsigned long)swiotlb->list,
+			   get_order(swiotlb->nslabs * sizeof(int)));
+		free_pages((unsigned long)phys_to_virt(swiotlb->start),
+			   get_order(swiotlb->nslabs << IO_TLB_SHIFT));
 	} else {
-		memblock_free_late(__pa(io_tlb_orig_addr),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		memblock_free_late(__pa(io_tlb_list),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		memblock_free_late(io_tlb_start,
-				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_late(__pa(swiotlb->orig_addr),
+				   PAGE_ALIGN(swiotlb->nslabs * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(swiotlb->list),
+				   PAGE_ALIGN(swiotlb->nslabs * sizeof(int)));
+		memblock_free_late(swiotlb->start,
+				   PAGE_ALIGN(swiotlb->nslabs << IO_TLB_SHIFT));
 	}
 	swiotlb_cleanup();
 }
@@ -465,7 +479,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
+	struct swiotlb *swiotlb = &default_swiotlb;
+	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, swiotlb->start);
 	unsigned long flags;
 	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
@@ -516,13 +531,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	 * Find suitable number of IO TLB entries size that will fit this
 	 * request and allocate a buffer from that IO TLB pool.
 	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
+	spin_lock_irqsave(&swiotlb->lock, flags);
 
-	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+	if (unlikely(nslots > swiotlb->nslabs - swiotlb->used))
 		goto not_found;
 
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
+	index = ALIGN(swiotlb->index, stride);
+	if (index >= swiotlb->nslabs)
 		index = 0;
 	wrap = index;
 
@@ -530,7 +545,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		while (iommu_is_span_boundary(index, nslots, offset_slots,
 					      max_slots)) {
 			index += stride;
-			if (index >= io_tlb_nslabs)
+			if (index >= swiotlb->nslabs)
 				index = 0;
 			if (index == wrap)
 				goto not_found;
@@ -541,40 +556,40 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (io_tlb_list[index] >= nslots) {
+		if (swiotlb->list[index] >= nslots) {
 			int count = 0;
 
 			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+				swiotlb->list[i] = 0;
+			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && swiotlb->list[i]; i--)
+				swiotlb->list[i] = ++count;
+			tlb_addr = swiotlb->start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
 			 * round.
 			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
-					? (index + nslots) : 0);
+			swiotlb->index = ((index + nslots) < swiotlb->nslabs
+				      ? (index + nslots) : 0);
 
 			goto found;
 		}
 		index += stride;
-		if (index >= io_tlb_nslabs)
+		if (index >= swiotlb->nslabs)
 			index = 0;
 	} while (index != wrap);
 
 not_found:
-	tmp_io_tlb_used = io_tlb_used;
+	tmp_io_tlb_used = swiotlb->used;
 
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	spin_unlock_irqrestore(&swiotlb->lock, flags);
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
+			 alloc_size, swiotlb->nslabs, tmp_io_tlb_used);
 	return (phys_addr_t)DMA_MAPPING_ERROR;
 found:
-	io_tlb_used += nslots;
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	swiotlb->used += nslots;
+	spin_unlock_irqrestore(&swiotlb->lock, flags);
 
 	/*
 	 * Save away the mapping from the original address to the DMA address.
@@ -582,7 +597,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+		swiotlb->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -597,10 +612,11 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      size_t mapping_size, size_t alloc_size,
 			      enum dma_data_direction dir, unsigned long attrs)
 {
+	struct swiotlb *swiotlb = &default_swiotlb;
 	unsigned long flags;
 	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+	int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = swiotlb->orig_addr[index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
@@ -616,36 +632,37 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
+	spin_lock_irqsave(&swiotlb->lock, flags);
 	{
 		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
+			 swiotlb->list[index + nslots] : 0);
 		/*
 		 * Step 1: return the slots to the free list, merging the
 		 * slots with superceeding slots
 		 */
 		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[i] = ++count;
-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+			swiotlb->list[i] = ++count;
+			swiotlb->orig_addr[i] = INVALID_PHYS_ADDR;
 		}
 		/*
 		 * Step 2: merge the returned slots with the preceding slots,
 		 * if available (non zero)
 		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
+		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && swiotlb->list[i]; i--)
+			swiotlb->list[i] = ++count;
 
-		io_tlb_used -= nslots;
+		swiotlb->used -= nslots;
 	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	spin_unlock_irqrestore(&swiotlb->lock, flags);
 }
 
 void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
 {
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+	struct swiotlb *swiotlb = &default_swiotlb;
+	int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = swiotlb->orig_addr[index];
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
@@ -713,31 +730,31 @@ size_t swiotlb_max_mapping_size(struct device *dev)
 bool is_swiotlb_active(void)
 {
 	/*
-	 * When SWIOTLB is initialized, even if io_tlb_start points to physical
-	 * address zero, io_tlb_end surely doesn't.
+	 * When SWIOTLB is initialized, even if swiotlb->start points to
+	 * physical address zero, swiotlb->end surely doesn't.
 	 */
-	return io_tlb_end != 0;
+	return default_swiotlb.end != 0;
 }
 
 bool is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return paddr >= io_tlb_start && paddr < io_tlb_end;
+	return paddr >= default_swiotlb.start && paddr < default_swiotlb.end;
 }
 
 phys_addr_t get_swiotlb_start(void)
 {
-	return io_tlb_start;
+	return default_swiotlb.start;
 }
 
 #ifdef CONFIG_DEBUG_FS
 
 static int __init swiotlb_create_debugfs(void)
 {
-	struct dentry *root;
+	struct swiotlb *swiotlb = &default_swiotlb;
 
-	root = debugfs_create_dir("swiotlb", NULL);
-	debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs);
-	debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used);
+	swiotlb->debugfs = debugfs_create_dir("swiotlb", NULL);
+	debugfs_create_ulong("io_tlb_nslabs", 0400, swiotlb->debugfs, &swiotlb->nslabs);
+	debugfs_create_ulong("io_tlb_used", 0400, swiotlb->debugfs, &swiotlb->used);
 	return 0;
 }
 
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 04/14] swiotlb: Refactor swiotlb_late_init_with_tbl
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Refactor swiotlb_late_init_with_tbl to make the code reusable for
restricted DMA pool initialization.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/swiotlb.c | 65 ++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 28b7bfe7a2a8..dc37951c6924 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -353,20 +353,21 @@ static void swiotlb_cleanup(void)
 	max_segment = 0;
 }
 
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+static int swiotlb_init_tlb_pool(struct swiotlb *swiotlb, phys_addr_t start,
+				size_t size)
 {
-	struct swiotlb *swiotlb = &default_swiotlb;
-	unsigned long i, bytes;
+	unsigned long i;
+	void *vaddr = phys_to_virt(start);
 
-	bytes = nslabs << IO_TLB_SHIFT;
+	size = ALIGN(size, 1 << IO_TLB_SHIFT);
+	swiotlb->nslabs = size >> IO_TLB_SHIFT;
+	swiotlb->nslabs = ALIGN(swiotlb->nslabs, IO_TLB_SEGSIZE);
 
-	swiotlb->nslabs = nslabs;
-	swiotlb->start = virt_to_phys(tlb);
-	swiotlb->end = swiotlb->start + bytes;
+	swiotlb->start = start;
+	swiotlb->end = swiotlb->start + size;
 
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	memset(tlb, 0, bytes);
+	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
+	memset(vaddr, 0, size);
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
@@ -390,13 +391,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 		swiotlb->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
 	swiotlb->index = 0;
-	no_iotlb_memory = false;
-
-	swiotlb_print_info();
 
-	late_alloc = 1;
-
-	swiotlb_set_max_segment(swiotlb->nslabs << IO_TLB_SHIFT);
 	spin_lock_init(&swiotlb->lock);
 
 	return 0;
@@ -410,6 +405,27 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	return -ENOMEM;
 }
 
+int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+	struct swiotlb *swiotlb = &default_swiotlb;
+	unsigned long bytes = nslabs << IO_TLB_SHIFT;
+	int ret;
+
+	ret = swiotlb_init_tlb_pool(swiotlb, virt_to_phys(tlb), bytes);
+	if (ret)
+		return ret;
+
+	no_iotlb_memory = false;
+
+	swiotlb_print_info();
+
+	late_alloc = 1;
+
+	swiotlb_set_max_segment(bytes);
+
+	return 0;
+}
+
 void __init swiotlb_exit(void)
 {
 	struct swiotlb *swiotlb = &default_swiotlb;
@@ -747,17 +763,20 @@ phys_addr_t get_swiotlb_start(void)
 }
 
 #ifdef CONFIG_DEBUG_FS
-
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs(struct swiotlb *swiotlb, const char *name,
+				   struct dentry *node)
 {
-	struct swiotlb *swiotlb = &default_swiotlb;
-
-	swiotlb->debugfs = debugfs_create_dir("swiotlb", NULL);
+	swiotlb->debugfs = debugfs_create_dir(name, node);
 	debugfs_create_ulong("io_tlb_nslabs", 0400, swiotlb->debugfs, &swiotlb->nslabs);
 	debugfs_create_ulong("io_tlb_used", 0400, swiotlb->debugfs, &swiotlb->used);
-	return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+static int __init swiotlb_create_default_debugfs(void)
+{
+	swiotlb_create_debugfs(&default_swiotlb, "swiotlb", NULL);
+
+	return 0;
+}
 
+late_initcall(swiotlb_create_default_debugfs);
 #endif
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 05/14] swiotlb: Add DMA_RESTRICTED_POOL
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add a new kconfig symbol, DMA_RESTRICTED_POOL, for restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/Kconfig | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 479fc145acfc..97ff9f8dd3c8 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -83,6 +83,20 @@ config SWIOTLB
 	bool
 	select NEED_DMA_MAP_STATE
 
+config DMA_RESTRICTED_POOL
+	bool "DMA Restricted Pool"
+	depends on OF && OF_RESERVED_MEM
+	select SWIOTLB
+	help
+	  This enables support for restricted DMA pools which provide a level of
+	  DMA memory protection on systems with limited hardware protection
+	  capabilities, such as those lacking an IOMMU.
+
+	  For more information see
+	  <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt>
+	  and <kernel/dma/swiotlb.c>.
+	  If unsure, say "n".
+
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
 # The only thing that is really required is a way to set an uncached bit
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 06/14] swiotlb: Add restricted DMA pool
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/device.h |  4 ++
 kernel/dma/swiotlb.c   | 94 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 7619a84f8ce4..08d440627b93 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -415,6 +415,7 @@ struct dev_links_info {
  * @dma_pools:	Dma pools (if dma'ble device).
  * @dma_mem:	Internal for coherent mem override.
  * @cma_area:	Contiguous memory area for dma allocations
+ * @dev_swiotlb: Internal for swiotlb override.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @fwnode:	Associated device node supplied by platform firmware.
@@ -517,6 +518,9 @@ struct device {
 #ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
+#endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	struct swiotlb *dev_swiotlb;
 #endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index dc37951c6924..3a17451c5981 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -39,6 +39,13 @@
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/slab.h>
+#endif
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -75,7 +82,8 @@ enum swiotlb_force swiotlb_force;
  *              range check to see if the memory was in fact allocated by this
  *              API.
  * @nslabs:     The number of IO TLB blocks (in groups of 64) between @start and
- *              @end. This is command line adjustable via setup_io_tlb_npages.
+ *              @end. For default swiotlb, this is command line adjustable via
+ *              setup_io_tlb_npages.
  * @used:       The number of used IO TLB block.
  * @list:       The free list describing the number of free entries available
  *              from each index.
@@ -780,3 +788,87 @@ static int __init swiotlb_create_default_debugfs(void)
 
 late_initcall(swiotlb_create_default_debugfs);
 #endif
+
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+				    struct device *dev)
+{
+	struct swiotlb *swiotlb = rmem->priv;
+	int ret;
+
+	if (dev->dev_swiotlb)
+		return -EBUSY;
+
+	/* Since multiple devices can share the same pool, the private data,
+	 * swiotlb struct, will be initialized by the first device attached
+	 * to it.
+	 */
+	if (!swiotlb) {
+		swiotlb = kzalloc(sizeof(*swiotlb), GFP_KERNEL);
+		if (!swiotlb)
+			return -ENOMEM;
+#ifdef CONFIG_ARM
+		unsigned long pfn = PHYS_PFN(reme->base);
+
+		if (!PageHighMem(pfn_to_page(pfn))) {
+			ret = -EINVAL;
+			goto cleanup;
+		}
+#endif /* CONFIG_ARM */
+
+		ret = swiotlb_init_tlb_pool(swiotlb, rmem->base, rmem->size);
+		if (ret)
+			goto cleanup;
+
+		rmem->priv = swiotlb;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	swiotlb_create_debugfs(swiotlb, rmem->name, default_swiotlb.debugfs);
+#endif /* CONFIG_DEBUG_FS */
+
+	dev->dev_swiotlb = swiotlb;
+
+	return 0;
+
+cleanup:
+	kfree(swiotlb);
+
+	return ret;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+					struct device *dev)
+{
+	if (!dev)
+		return;
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(dev->dev_swiotlb->debugfs);
+#endif /* CONFIG_DEBUG_FS */
+	dev->dev_swiotlb = NULL;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+	.device_init = rmem_swiotlb_device_init,
+	.device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+	unsigned long node = rmem->fdt_node;
+
+	if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -EINVAL;
+
+	rmem->ops = &rmem_swiotlb_ops;
+	pr_info("Reserved memory: created device swiotlb memory pool at %pa, size %ld MiB\n",
+		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+	return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 07/14] swiotlb: Update swiotlb API to gain a struct device argument
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Introduce the get_swiotlb() getter and update all callers of
is_swiotlb_active(), is_swiotlb_buffer() and get_swiotlb_start() to gain
a struct device argument.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 drivers/iommu/dma-iommu.c | 12 ++++++------
 drivers/xen/swiotlb-xen.c |  4 ++--
 include/linux/swiotlb.h   | 10 +++++-----
 kernel/dma/direct.c       |  8 ++++----
 kernel/dma/direct.h       |  6 +++---
 kernel/dma/swiotlb.c      | 23 +++++++++++++++++------
 6 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f659395e7959..abdbe14472cc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -503,7 +503,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 
 	__iommu_dma_unmap(dev, dma_addr, size);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size,
 				iova_align(iovad, size), dir, attrs);
 }
@@ -580,7 +580,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	}
 
 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-	if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+	if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_unmap_single(dev, phys, org_size,
 				aligned_size, dir, attrs);
 
@@ -753,7 +753,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 	if (!dev_is_dma_coherent(dev))
 		arch_sync_dma_for_cpu(phys, size, dir);
 
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
@@ -766,7 +766,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
 
 	if (!dev_is_dma_coherent(dev))
@@ -787,7 +787,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
 						dir, SYNC_FOR_CPU);
 	}
@@ -804,7 +804,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 		return;
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
 						dir, SYNC_FOR_DEVICE);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 91f8c68d1a9b..f424d46756b1 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -192,8 +192,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	/*
 	 * IO TLB memory already allocated. Just use it.
 	 */
-	if (is_swiotlb_active()) {
-		xen_io_tlb_start = phys_to_virt(get_swiotlb_start());
+	if (is_swiotlb_active(NULL)) {
+		xen_io_tlb_start = phys_to_virt(get_swiotlb_start(NULL));
 		goto end;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 041611bf3c2a..f13a52a97382 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -71,16 +71,16 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
-bool is_swiotlb_buffer(phys_addr_t paddr);
+bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
-phys_addr_t get_swiotlb_start(void);
+bool is_swiotlb_active(struct device *dev);
+phys_addr_t get_swiotlb_start(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
 	return false;
 }
@@ -96,7 +96,7 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
 	return SIZE_MAX;
 }
 
-static inline bool is_swiotlb_active(void)
+static inline bool is_swiotlb_active(struct device *dev)
 {
 	return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 002268262c9a..30ccbc08e229 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -343,7 +343,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 	for_each_sg(sgl, sg, nents, i) {
 		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_tbl_sync_single(dev, paddr, sg->length,
 					dir, SYNC_FOR_DEVICE);
 
@@ -369,7 +369,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(paddr, sg->length, dir);
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
 					SYNC_FOR_CPU);
 
@@ -495,7 +495,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
 size_t dma_direct_max_mapping_size(struct device *dev)
 {
 	/* If SWIOTLB is active, use its maximum mapping size */
-	if (is_swiotlb_active() &&
+	if (is_swiotlb_active(dev) &&
 	    (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
 		return swiotlb_max_mapping_size(dev);
 	return SIZE_MAX;
@@ -504,7 +504,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
 bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
 {
 	return !dev_is_dma_coherent(dev) ||
-		is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+		is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
 }
 
 /**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index b98615578737..7b83b1595989 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
 {
 	phys_addr_t paddr = dma_to_phys(dev, addr);
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
 
 	if (!dev_is_dma_coherent(dev))
@@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
 		arch_sync_dma_for_cpu_all();
 	}
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
 
 	if (dir == DMA_FROM_DEVICE)
@@ -113,7 +113,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
 }
 #endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 3a17451c5981..e22e7ae75f1c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -107,6 +107,11 @@ struct swiotlb {
 };
 static struct swiotlb default_swiotlb;
 
+static inline struct swiotlb *get_swiotlb(struct device *dev)
+{
+	return &default_swiotlb;
+}
+
 /*
  * Max segment that we can provide which (if pages are contingous) will
  * not be bounced (unless SWIOTLB_FORCE is set).
@@ -751,23 +756,29 @@ size_t swiotlb_max_mapping_size(struct device *dev)
 	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
 }
 
-bool is_swiotlb_active(void)
+bool is_swiotlb_active(struct device *dev)
 {
+	struct swiotlb *swiotlb = get_swiotlb(dev);
+
 	/*
 	 * When SWIOTLB is initialized, even if swiotlb->start points to
 	 * physical address zero, swiotlb->end surely doesn't.
 	 */
-	return default_swiotlb.end != 0;
+	return swiotlb->end != 0;
 }
 
-bool is_swiotlb_buffer(phys_addr_t paddr)
+bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
-	return paddr >= default_swiotlb.start && paddr < default_swiotlb.end;
+	struct swiotlb *swiotlb = get_swiotlb(dev);
+
+	return paddr >= swiotlb->start && paddr < swiotlb->end;
 }
 
-phys_addr_t get_swiotlb_start(void)
+phys_addr_t get_swiotlb_start(struct device *dev)
 {
-	return default_swiotlb.start;
+	struct swiotlb *swiotlb = get_swiotlb(dev);
+
+	return swiotlb->start;
 }
 
 #ifdef CONFIG_DEBUG_FS
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 08/14] swiotlb: Use restricted DMA pool if available
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Regardless of swiotlb setting, the restricted DMA pool is preferred if
available.

The restricted DMA pools provide a basic level of protection against the
DMA overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system
needs to provide a way to lock down the memory access, e.g., MPU.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h | 13 +++++++++++++
 kernel/dma/direct.h     |  2 +-
 kernel/dma/swiotlb.c    | 20 +++++++++++++++++---
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f13a52a97382..76f86c684524 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -71,6 +71,15 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+bool is_swiotlb_force(struct device *dev);
+#else
+static inline bool is_swiotlb_force(struct device *dev)
+{
+	return unlikely(swiotlb_force == SWIOTLB_FORCE);
+}
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
+
 bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
@@ -80,6 +89,10 @@ phys_addr_t get_swiotlb_start(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
+static inline bool is_swiotlb_force(struct device *dev)
+{
+	return false;
+}
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
 	return false;
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 7b83b1595989..b011db1b625d 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -87,7 +87,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 	phys_addr_t phys = page_to_phys(page) + offset;
 	dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-	if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+	if (is_swiotlb_force(dev))
 		return swiotlb_map(dev, phys, size, dir, attrs);
 
 	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e22e7ae75f1c..6fdebde8fb1f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -40,6 +40,7 @@
 #include <linux/debugfs.h>
 #endif
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+#include <linux/device.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -109,6 +110,10 @@ static struct swiotlb default_swiotlb;
 
 static inline struct swiotlb *get_swiotlb(struct device *dev)
 {
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	if (dev && dev->dev_swiotlb)
+		return dev->dev_swiotlb;
+#endif
 	return &default_swiotlb;
 }
 
@@ -508,7 +513,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	struct swiotlb *swiotlb = &default_swiotlb;
+	struct swiotlb *swiotlb = get_swiotlb(hwdev);
 	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, swiotlb->start);
 	unsigned long flags;
 	phys_addr_t tlb_addr;
@@ -519,7 +524,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	unsigned long max_slots;
 	unsigned long tmp_io_tlb_used;
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	if (no_iotlb_memory && !hwdev->dev_swiotlb)
+#else
 	if (no_iotlb_memory)
+#endif
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
 
 	if (mem_encrypt_active())
@@ -641,7 +650,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      size_t mapping_size, size_t alloc_size,
 			      enum dma_data_direction dir, unsigned long attrs)
 {
-	struct swiotlb *swiotlb = &default_swiotlb;
+	struct swiotlb *swiotlb = get_swiotlb(hwdev);
 	unsigned long flags;
 	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 	int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
@@ -689,7 +698,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
 {
-	struct swiotlb *swiotlb = &default_swiotlb;
+	struct swiotlb *swiotlb = get_swiotlb(hwdev);
 	int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = swiotlb->orig_addr[index];
 
@@ -801,6 +810,11 @@ late_initcall(swiotlb_create_default_debugfs);
 #endif
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+bool is_swiotlb_force(struct device *dev)
+{
+	return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
+}
+
 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 				    struct device *dev)
 {
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 09/14] swiotlb: Refactor swiotlb_tbl_{map,unmap}_single
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Refactor swiotlb_tbl_{map,unmap}_single to make the code reusable for
dev_swiotlb_{alloc,free}.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/swiotlb.c | 116 ++++++++++++++++++++++++++-----------------
 1 file changed, 71 insertions(+), 45 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 6fdebde8fb1f..f64cbe6e84cc 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -509,14 +509,12 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 	}
 }
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
-		size_t mapping_size, size_t alloc_size,
-		enum dma_data_direction dir, unsigned long attrs)
+static int swiotlb_tbl_find_free_region(struct device *hwdev,
+					dma_addr_t tbl_dma_addr,
+					size_t alloc_size, unsigned long attrs)
 {
 	struct swiotlb *swiotlb = get_swiotlb(hwdev);
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, swiotlb->start);
 	unsigned long flags;
-	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
 	unsigned long mask;
@@ -531,15 +529,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 #endif
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
 
-	if (mem_encrypt_active())
-		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
-
-	if (mapping_size > alloc_size) {
-		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
-			      mapping_size, alloc_size);
-		return (phys_addr_t)DMA_MAPPING_ERROR;
-	}
-
 	mask = dma_get_seg_boundary(hwdev);
 
 	tbl_dma_addr &= mask;
@@ -601,7 +590,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 				swiotlb->list[i] = 0;
 			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && swiotlb->list[i]; i--)
 				swiotlb->list[i] = ++count;
-			tlb_addr = swiotlb->start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
@@ -624,45 +612,20 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
 			 alloc_size, swiotlb->nslabs, tmp_io_tlb_used);
-	return (phys_addr_t)DMA_MAPPING_ERROR;
+	return -ENOMEM;
+
 found:
 	swiotlb->used += nslots;
 	spin_unlock_irqrestore(&swiotlb->lock, flags);
 
-	/*
-	 * Save away the mapping from the original address to the DMA address.
-	 * This is needed when we sync the memory.  Then we sync the buffer if
-	 * needed.
-	 */
-	for (i = 0; i < nslots; i++)
-		swiotlb->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
-	return tlb_addr;
+	return index;
 }
 
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t mapping_size, size_t alloc_size,
-			      enum dma_data_direction dir, unsigned long attrs)
+static void swiotlb_tbl_release_region(struct device *hwdev, int index, size_t size)
 {
 	struct swiotlb *swiotlb = get_swiotlb(hwdev);
 	unsigned long flags;
-	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = swiotlb->orig_addr[index];
-
-	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (orig_addr != INVALID_PHYS_ADDR &&
-	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -694,6 +657,69 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	spin_unlock_irqrestore(&swiotlb->lock, flags);
 }
 
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+				   size_t mapping_size, size_t alloc_size,
+				   enum dma_data_direction dir,
+				   unsigned long attrs)
+{
+	struct swiotlb *swiotlb = get_swiotlb(hwdev);
+	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, swiotlb->start);
+	phys_addr_t tlb_addr;
+	unsigned int nslots, index;
+	int i;
+
+	if (mem_encrypt_active())
+		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+	if (mapping_size > alloc_size) {
+		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+			      mapping_size, alloc_size);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
+
+	index = swiotlb_tbl_find_free_region(hwdev, tbl_dma_addr, alloc_size, attrs);
+	if (index < 0)
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+
+	tlb_addr = swiotlb->start + (index << IO_TLB_SHIFT);
+
+	/*
+	 * Save away the mapping from the original address to the DMA address.
+	 * This is needed when we sync the memory.  Then we sync the buffer if
+	 * needed.
+	 */
+	nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	for (i = 0; i < nslots; i++)
+		swiotlb->orig_addr[index + i] = orig_addr + (i << IO_TLB_SHIFT);
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+
+	return tlb_addr;
+}
+
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+			      size_t mapping_size, size_t alloc_size,
+			      enum dma_data_direction dir, unsigned long attrs)
+{
+	struct swiotlb *swiotlb = get_swiotlb(hwdev);
+	int index = (tlb_addr - swiotlb->start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = swiotlb->orig_addr[index];
+
+	/*
+	 * First, sync the memory before unmapping the entry
+	 */
+	if (orig_addr != INVALID_PHYS_ADDR &&
+	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+
+	swiotlb_tbl_release_region(hwdev, index, alloc_size);
+}
+
 void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 10/14] dma-direct: Add a new wrapper __dma_direct_free_pages()
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add a new wrapper __dma_direct_free_pages() that will be useful later
for dev_swiotlb_free().

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/direct.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 30ccbc08e229..a76a1a2f24da 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -75,6 +75,11 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 		min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
 }
 
+static void __dma_direct_free_pages(struct device *dev, struct page *page, size_t size)
+{
+	dma_free_contiguous(dev, page, size);
+}
+
 static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 		gfp_t gfp)
 {
@@ -237,7 +242,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 			return NULL;
 	}
 out_free_pages:
-	dma_free_contiguous(dev, page, size);
+	__dma_direct_free_pages(dev, page, size);
 	return NULL;
 }
 
@@ -273,7 +278,7 @@ void dma_direct_free(struct device *dev, size_t size,
 	else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
 		arch_dma_clear_uncached(cpu_addr, size);
 
-	dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
+	__dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
 }
 
 struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
@@ -310,7 +315,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
 	*dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
 	return page;
 out_free_pages:
-	dma_free_contiguous(dev, page, size);
+	__dma_direct_free_pages(dev, page, size);
 	return NULL;
 }
 
@@ -329,7 +334,7 @@ void dma_direct_free_pages(struct device *dev, size_t size,
 	if (force_dma_unencrypted(dev))
 		set_memory_encrypted((unsigned long)vaddr, 1 << page_order);
 
-	dma_free_contiguous(dev, page, size);
+	__dma_direct_free_pages(dev, page, size);
 }
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 11/14] swiotlb: Add is_dev_swiotlb_force()
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add is_dev_swiotlb_force() which returns true if the device has
restricted DMA pool (e.g. dev->dev_swiotlb is set).

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h | 9 +++++++++
 kernel/dma/swiotlb.c    | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 76f86c684524..b9f2a250c8da 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,11 +73,16 @@ extern enum swiotlb_force swiotlb_force;
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 bool is_swiotlb_force(struct device *dev);
+bool is_dev_swiotlb_force(struct device *dev);
 #else
 static inline bool is_swiotlb_force(struct device *dev)
 {
 	return unlikely(swiotlb_force == SWIOTLB_FORCE);
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+	return false;
+}
 #endif /* CONFIG_DMA_RESTRICTED_POOL */
 
 bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
@@ -93,6 +98,10 @@ static inline bool is_swiotlb_force(struct device *dev)
 {
 	return false;
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+	return false;
+}
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
 	return false;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f64cbe6e84cc..fd9c1bd183ac 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -841,6 +841,11 @@ bool is_swiotlb_force(struct device *dev)
 	return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
 }
 
+bool is_dev_swiotlb_force(struct device *dev)
+{
+	return dev->dev_swiotlb;
+}
+
 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 				    struct device *dev)
 {
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 12/14] swiotlb: Add restricted DMA alloc/free support.
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add the functions, dev_swiotlb_{alloc,free} to support the memory
allocation from restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h |  2 ++
 kernel/dma/direct.c     | 30 ++++++++++++++++++++++--------
 kernel/dma/swiotlb.c    | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b9f2a250c8da..2cd39e102915 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -74,6 +74,8 @@ extern enum swiotlb_force swiotlb_force;
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 bool is_swiotlb_force(struct device *dev);
 bool is_dev_swiotlb_force(struct device *dev);
+struct page *dev_swiotlb_alloc(struct device *dev, size_t size, gfp_t gfp);
+bool dev_swiotlb_free(struct device *dev, struct page *page, size_t size);
 #else
 static inline bool is_swiotlb_force(struct device *dev)
 {
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index a76a1a2f24da..f9a9321f7559 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -12,6 +12,7 @@
 #include <linux/pfn.h>
 #include <linux/vmalloc.h>
 #include <linux/set_memory.h>
+#include <linux/swiotlb.h>
 #include <linux/slab.h>
 #include "direct.h"
 
@@ -77,6 +78,10 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 
 static void __dma_direct_free_pages(struct device *dev, struct page *page, size_t size)
 {
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	if (dev_swiotlb_free(dev, page, size))
+		return;
+#endif
 	dma_free_contiguous(dev, page, size);
 }
 
@@ -89,6 +94,12 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 
 	WARN_ON_ONCE(!PAGE_ALIGNED(size));
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	page = dev_swiotlb_alloc(dev, size, gfp);
+	if (page)
+		return page;
+#endif
+
 	gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
 					   &phys_limit);
 	page = dma_alloc_contiguous(dev, size, gfp);
@@ -147,7 +158,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 		gfp |= __GFP_NOWARN;
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-	    !force_dma_unencrypted(dev)) {
+	    !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
 		page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
 		if (!page)
 			return NULL;
@@ -160,8 +171,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 	}
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !dev_is_dma_coherent(dev))
+	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+	    !is_dev_swiotlb_force(dev))
 		return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 
 	/*
@@ -171,7 +182,9 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
 	    !gfpflags_allow_blocking(gfp) &&
 	    (force_dma_unencrypted(dev) ||
-	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev))))
+	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+	      !dev_is_dma_coherent(dev))) &&
+	    !is_dev_swiotlb_force(dev))
 		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
 	/* we always manually zero the memory once we are done */
@@ -252,15 +265,15 @@ void dma_direct_free(struct device *dev, size_t size,
 	unsigned int page_order = get_order(size);
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-	    !force_dma_unencrypted(dev)) {
+	    !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
 		/* cpu_addr is a struct page cookie, not a kernel address */
 		dma_free_contiguous(dev, cpu_addr, size);
 		return;
 	}
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !dev_is_dma_coherent(dev)) {
+	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+	    !is_dev_swiotlb_force(dev)) {
 		arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
 		return;
 	}
@@ -288,7 +301,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
 	void *ret;
 
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
-	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
+	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) &&
+	    !is_dev_swiotlb_force(dev))
 		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
 	page = __dma_direct_alloc_pages(dev, size, gfp);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index fd9c1bd183ac..8b77fd64199e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -836,6 +836,40 @@ late_initcall(swiotlb_create_default_debugfs);
 #endif
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+struct page *dev_swiotlb_alloc(struct device *dev, size_t size, gfp_t gfp)
+{
+	struct swiotlb *swiotlb;
+	phys_addr_t tlb_addr;
+	unsigned int index;
+
+	/* dev_swiotlb_alloc can be used only in the context which permits sleeping. */
+	if (!dev->dev_swiotlb || !gfpflags_allow_blocking(gfp))
+		return NULL;
+
+	swiotlb = dev->dev_swiotlb;
+	index = swiotlb_tbl_find_free_region(dev, swiotlb->start, size, 0);
+	if (index < 0)
+		return NULL;
+
+	tlb_addr = swiotlb->start + (index << IO_TLB_SHIFT);
+
+	return pfn_to_page(PFN_DOWN(tlb_addr));
+}
+
+bool dev_swiotlb_free(struct device *dev, struct page *page, size_t size)
+{
+	unsigned int index;
+	phys_addr_t tlb_addr = page_to_phys(page);
+
+	if (!is_swiotlb_buffer(dev, tlb_addr))
+		return false;
+
+	index = (tlb_addr - dev->dev_swiotlb->start) >> IO_TLB_SHIFT;
+	swiotlb_tbl_release_region(dev, index, size);
+
+	return true;
+}
+
 bool is_swiotlb_force(struct device *dev)
 {
 	return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 13/14] dt-bindings: of: Add restricted DMA pool
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Introduce the new compatible string, restricted-dma-pool, for restricted
DMA. One can specify the address and length of the restricted DMA memory
region by restricted-dma-pool in the reserved-memory node.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 .../reserved-memory/reserved-memory.txt       | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..fc9a12c2f679 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,20 @@ compatible (optional) - standard definition
           used as a shared pool of DMA buffers for a set of devices. It can
           be used by an operating system to instantiate the necessary pool
           management subsystem if necessary.
+        - restricted-dma-pool: This indicates a region of memory meant to be
+          used as a pool of restricted DMA buffers for a set of devices. The
+          memory region would be the only region accessible to those devices.
+          When using this, the no-map and reusable properties must not be set,
+          so the operating system can create a virtual mapping that will be used
+          for synchronization. The main purpose for restricted DMA is to
+          mitigate the lack of DMA access control on systems without an IOMMU,
+          which could result in the DMA accessing the system memory at
+          unexpected times and/or unexpected addresses, possibly leading to data
+          leakage or corruption. The feature on its own provides a basic level
+          of protection against the DMA overwriting buffer contents at
+          unexpected times. However, to protect against general data leakage and
+          system memory corruption, the system needs to provide way to lock down
+          the memory access, e.g., MPU.
         - vendor specific string in the form <vendor>,[<device>-]<usage>
 no-map (optional) - empty property
     - Indicates the operating system must not create a virtual mapping
@@ -120,6 +134,11 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
 			compatible = "acme,multimedia-memory";
 			reg = <0x77000000 0x4000000>;
 		};
+
+		restricted_dma_mem_reserved: restricted_dma_mem_reserved {
+			compatible = "restricted-dma-pool";
+			reg = <0x50000000 0x400000>;
+		};
 	};
 
 	/* ... */
@@ -138,4 +157,9 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
 		memory-region = <&multimedia_reserved>;
 		/* ... */
 	};
+
+	pcie_device: pcie_device@0,0 {
+		memory-region = <&restricted_dma_mem_reserved>;
+		/* ... */
+	};
 };
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 14/14] of: Add plumbing for restricted DMA pool
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma-pool is presented.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 drivers/of/address.c    | 25 +++++++++++++++++++++++++
 drivers/of/device.c     |  3 +++
 drivers/of/of_private.h |  5 +++++
 3 files changed, 33 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 73ddf2540f3f..b6093c9b135d 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
 #include <linux/logic_pio.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
 #include <linux/sizes.h>
@@ -1094,3 +1095,27 @@ bool of_dma_is_coherent(struct device_node *np)
 	return false;
 }
 EXPORT_SYMBOL_GPL(of_dma_is_coherent);
+
+int of_dma_set_restricted_buffer(struct device *dev)
+{
+	struct device_node *node;
+	int count, i;
+
+	if (!dev->of_node)
+		return 0;
+
+	count = of_property_count_elems_of_size(dev->of_node, "memory-region",
+						sizeof(phandle));
+	for (i = 0; i < count; i++) {
+		node = of_parse_phandle(dev->of_node, "memory-region", i);
+		/* There might be multiple memory regions, but only one
+		 * restriced-dma-pool region is allowed.
+		 */
+		if (of_device_is_compatible(node, "restricted-dma-pool") &&
+		    of_device_is_available(node))
+			return of_reserved_mem_device_init_by_idx(
+				dev, dev->of_node, i);
+	}
+
+	return 0;
+}
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 1122daa8e273..38c631f1fafa 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -186,6 +186,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
 
 	arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+	if (!iommu)
+		return of_dma_set_restricted_buffer(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index d9e6a324de0a..28a2dfa197ba 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -161,12 +161,17 @@ struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
 		const struct bus_dma_region **map);
+int of_dma_set_restricted_buffer(struct device *dev);
 #else
 static inline int of_dma_get_range(struct device_node *np,
 		const struct bus_dma_region **map)
 {
 	return -ENODEV;
 }
+static inline int of_dma_get_restricted_buffer(struct device *dev)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif /* _LINUX_OF_PRIVATE_H */
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* Re: [RFC PATCH v3 0/6] Restricted DMA
From: Claire Chang @ 2021-02-09  6:27 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
	mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
	Joerg Roedel, Rafael J . Wysocki, Christoph Hellwig,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	Will Deacon, Konrad Rzeszutek Wilk, Dan Williams, linuxppc-dev,
	Rob Herring, boris.ostrovsky, Andy Shevchenko, jgross,
	Nicolas Boichat, Greg KH, Randy Dunlap, lkml, Tomasz Figa,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, xypron.glpk,
	Robin Murphy, bauerman
In-Reply-To: <7fe99ad2-79a7-9c8b-65ce-ce8353e9d9bf@gmail.com>

v4 here: https://lore.kernel.org/patchwork/cover/1378113/

^ permalink raw reply

* Re: [PATCH v7 32/42] powerpc/64: context tracking move to interrupt wrappers
From: Nicholas Piggin @ 2021-02-09  7:45 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev; +Cc: Athira Rajeev
In-Reply-To: <3cb26edb-c9cc-4f21-0b6d-dcd4bbeed7b3@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 3:49 pm:
> 
> 
> Le 30/01/2021 à 14:08, Nicholas Piggin a écrit :
>> This moves exception_enter/exit calls to wrapper functions for
>> synchronous interrupts. More interrupt handlers are covered by
>> this than previously.
> 
> Why did you enclose everything in #ifdef CONFIG_PPC64 ? As far as I understand, before this patch 
> exception_enter() and exception_exit() are called also on PPC32.

PPC32 never selects CONTEXT_TRACKING AFAIKS, but I'm not sure. I worried 
ctx_state would not be no-oped, but if it's all inlined into the same
function then maybe the compiler will eliminate it.

On the other hand I may move some of the wrapper into its own function 
if that helps code size, but we can do something about it then...

Hmm, end result is it shouldn't matter for PPC32 at the moment.

Thanks,
Nick

> 
> Christophe
> 
> 
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/include/asm/interrupt.h  |  9 ++++
>>   arch/powerpc/kernel/traps.c           | 74 ++++++---------------------
>>   arch/powerpc/mm/book3s64/hash_utils.c |  3 --
>>   arch/powerpc/mm/fault.c               |  9 +---
>>   4 files changed, 27 insertions(+), 68 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
>> index 488bdd5bd922..e65ce3e2b071 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -7,10 +7,16 @@
>>   #include <asm/ftrace.h>
>>   
>>   struct interrupt_state {
>> +#ifdef CONFIG_PPC64
>> +	enum ctx_state ctx_state;
>> +#endif
>>   };
>>   
>>   static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
>>   {
>> +#ifdef CONFIG_PPC64
>> +	state->ctx_state = exception_enter();
>> +#endif
>>   }
>>   
>>   /*
>> @@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
>>    */
>>   static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
>>   {
>> +#ifdef CONFIG_PPC64
>> +	exception_exit(state->ctx_state);
>> +#endif
>>   }
>>   
>>   static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
>> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
>> index da488e62fb5f..21fd14828827 100644
>> --- a/arch/powerpc/kernel/traps.c
>> +++ b/arch/powerpc/kernel/traps.c
>> @@ -1087,41 +1087,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
>>   
>>   DEFINE_INTERRUPT_HANDLER(unknown_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
>>   	       regs->nip, regs->msr, regs->trap);
>>   
>>   	_exception(SIGTRAP, regs, TRAP_UNK, 0);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
>>   	       regs->nip, regs->msr, regs->trap);
>>   
>>   	_exception(SIGTRAP, regs, TRAP_UNK, 0);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
>>   					5, SIGTRAP) == NOTIFY_STOP)
>> -		goto bail;
>> +		return;
>>   	if (debugger_iabr_match(regs))
>> -		goto bail;
>> +		return;
>>   	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(RunModeException)
>> @@ -1131,8 +1118,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
>>   
>>   DEFINE_INTERRUPT_HANDLER(single_step_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	clear_single_step(regs);
>>   	clear_br_trace(regs);
>>   
>> @@ -1141,14 +1126,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
>>   
>>   	if (notify_die(DIE_SSTEP, "single_step", regs, 5,
>>   					5, SIGTRAP) == NOTIFY_STOP)
>> -		goto bail;
>> +		return;
>>   	if (debugger_sstep(regs))
>> -		goto bail;
>> +		return;
>>   
>>   	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   NOKPROBE_SYMBOL(single_step_exception);
>>   
>> @@ -1476,7 +1458,6 @@ static inline int emulate_math(struct pt_regs *regs) { return -1; }
>>   
>>   DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>>   	unsigned int reason = get_reason(regs);
>>   
>>   	/* We can now get here via a FP Unavailable exception if the core
>> @@ -1485,22 +1466,22 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   	if (reason & REASON_FP) {
>>   		/* IEEE FP exception */
>>   		parse_fpe(regs);
>> -		goto bail;
>> +		return;
>>   	}
>>   	if (reason & REASON_TRAP) {
>>   		unsigned long bugaddr;
>>   		/* Debugger is first in line to stop recursive faults in
>>   		 * rcu_lock, notify_die, or atomic_notifier_call_chain */
>>   		if (debugger_bpt(regs))
>> -			goto bail;
>> +			return;
>>   
>>   		if (kprobe_handler(regs))
>> -			goto bail;
>> +			return;
>>   
>>   		/* trap exception */
>>   		if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
>>   				== NOTIFY_STOP)
>> -			goto bail;
>> +			return;
>>   
>>   		bugaddr = regs->nip;
>>   		/*
>> @@ -1512,10 +1493,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
>>   		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
>>   			regs->nip += 4;
>> -			goto bail;
>> +			return;
>>   		}
>>   		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
>> -		goto bail;
>> +		return;
>>   	}
>>   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>>   	if (reason & REASON_TM) {
>> @@ -1536,7 +1517,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		 */
>>   		if (user_mode(regs)) {
>>   			_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
>> -			goto bail;
>> +			return;
>>   		} else {
>>   			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
>>   			       "at %lx (msr 0x%lx) tm_scratch=%llx\n",
>> @@ -1567,7 +1548,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   	 * pattern to occurrences etc. -dgibson 31/Mar/2003
>>   	 */
>>   	if (!emulate_math(regs))
>> -		goto bail;
>> +		return;
>>   
>>   	/* Try to emulate it if we should. */
>>   	if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
>> @@ -1575,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		case 0:
>>   			regs->nip += 4;
>>   			emulate_single_step(regs);
>> -			goto bail;
>> +			return;
>>   		case -EFAULT:
>>   			_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
>> -			goto bail;
>> +			return;
>>   		}
>>   	}
>>   
>> @@ -1587,9 +1568,6 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
>>   	else
>>   		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   NOKPROBE_SYMBOL(program_check_exception);
>>   
>> @@ -1606,14 +1584,12 @@ NOKPROBE_SYMBOL(emulation_assist_interrupt);
>>   
>>   DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>>   	int sig, code, fixed = 0;
>>   	unsigned long  reason;
>>   
>>   	interrupt_cond_local_irq_enable(regs);
>>   
>>   	reason = get_reason(regs);
>> -
>>   	if (reason & REASON_BOUNDARY) {
>>   		sig = SIGBUS;
>>   		code = BUS_ADRALN;
>> @@ -1621,7 +1597,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   	}
>>   
>>   	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
>> -		goto bail;
>> +		return;
>>   
>>   	/* we don't implement logging of alignment exceptions */
>>   	if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
>> @@ -1631,7 +1607,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   		/* skip over emulated instruction */
>>   		regs->nip += inst_length(reason);
>>   		emulate_single_step(regs);
>> -		goto bail;
>> +		return;
>>   	}
>>   
>>   	/* Operand address was bad */
>> @@ -1647,9 +1623,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   		_exception(sig, regs, code, regs->dar);
>>   	else
>>   		bad_page_fault(regs, sig);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(StackOverflow)
>> @@ -1663,41 +1636,28 @@ DEFINE_INTERRUPT_HANDLER(StackOverflow)
>>   
>>   DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	die("Kernel stack overflow", regs, SIGSEGV);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
>>   			  "%lx at %lx\n", regs->trap, regs->nip);
>>   	die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	if (user_mode(regs)) {
>>   		/* A user program has executed an altivec instruction,
>>   		   but this kernel doesn't support altivec. */
>>   		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>> -		goto bail;
>> +		return;
>>   	}
>>   
>>   	printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
>>   			"%lx at %lx\n", regs->trap, regs->nip);
>>   	die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
>> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
>> index d681dc5a7b1c..fb7c10524bcd 100644
>> --- a/arch/powerpc/mm/book3s64/hash_utils.c
>> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
>> @@ -1514,7 +1514,6 @@ EXPORT_SYMBOL_GPL(hash_page);
>>   DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
>>   DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>>   	unsigned long ea = regs->dar;
>>   	unsigned long dsisr = regs->dsisr;
>>   	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
>> @@ -1563,8 +1562,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
>>   		err = 0;
>>   	}
>>   
>> -	exception_exit(prev_state);
>> -
>>   	return err;
>>   }
>>   
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 9c4220efc20f..b26a7643fc6e 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -564,14 +564,7 @@ NOKPROBE_SYMBOL(__do_page_fault);
>>   
>>   DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -	long err;
>> -
>> -	err = __do_page_fault(regs);
>> -
>> -	exception_exit(prev_state);
>> -
>> -	return err;
>> +	return __do_page_fault(regs);
>>   }
>>   NOKPROBE_SYMBOL(do_page_fault);
>>   
>> 
> 

^ permalink raw reply

* Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe
From: Nicholas Piggin @ 2021-02-09  7:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <258ac0c6-ef40-86d4-2ce4-772cfc4a95e5@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 3:57 pm:
> 
> 
> Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> regs->softe doesn't exist on PPC32.
>>>
>>> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
>>> This helper will void on PPC32.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>>   arch/powerpc/include/asm/hw_irq.h | 11 +++++++++--
>>>   1 file changed, 9 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>>> index 614957f74cee..ed0c3b049dfd 100644
>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>> @@ -38,6 +38,8 @@
>>>   #define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE)
>>>   #endif
>>>   
>>> +#endif /* CONFIG_PPC64 */
>>> +
>>>   /*
>>>    * flags for paca->irq_soft_mask
>>>    */
>>> @@ -46,8 +48,6 @@
>>>   #define IRQS_PMI_DISABLED	2
>>>   #define IRQS_ALL_DISABLED	(IRQS_DISABLED | IRQS_PMI_DISABLED)
>>>   
>>> -#endif /* CONFIG_PPC64 */
>>> -
>>>   #ifndef __ASSEMBLY__
>>>   
>>>   #ifdef CONFIG_PPC64
>>> @@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long srr1);
>>>   
>>>   extern void force_external_irq_replay(void);
>>>   
>>> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
>>> +{
>>> +	regs->softe = val;
>>> +}
>>>   #else /* CONFIG_PPC64 */
>>>   
>>>   static inline unsigned long arch_local_save_flags(void)
>>> @@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
>>>   
>>>   static inline void may_hard_irq_enable(void) { }
>>>   
>>> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
>>> +{
>>> +}
>>>   #endif /* CONFIG_PPC64 */
>>>   
>>>   #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
>> 
>> What I don't like about this where you use it is it kind of pollutes
>> the ppc32 path with this function which is not valid to use.
>> 
>> I would prefer if you had this purely so it could compile with:
>> 
>>    if (IS_ENABLED(CONFIG_PPC64)))
>>        irq_soft_mask_regs_set_state(regs, blah);
>> 
>> And then you could make the ppc32 cause a link error if it did not
>> get eliminated at compile time (e.g., call an undefined function).
>> 
>> You could do the same with the kuap_ functions to change some ifdefs
>> to IS_ENABLED.
>> 
>> That's just my preference but if you prefer this way I guess that's
>> okay.
> 
> I see you didn't change your mind since last April :)
> 
> I'll see what I can do.

If you have more patches in the works and will do some cleanup passes I 
don't mind so much.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe
From: Nicholas Piggin @ 2021-02-09  7:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <5987787e-ee80-ed0e-0c34-9884f6aad3c5@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 4:18 pm:
> 
> 
> Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> regs->softe doesn't exist on PPC32.
>>>
>>> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
>>> This helper will void on PPC32.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>> 
>> You could do the same with the kuap_ functions to change some ifdefs
>> to IS_ENABLED.
>> 
>> That's just my preference but if you prefer this way I guess that's
>> okay.
>> 
> 
> 
> That's also my preference on the long term.
> 
> Here it is ephemeral, I have a follow up series implementing interrupt exit/entry in C and getting 
> rid of all the assembly kuap hence getting rid of those ifdefs.

I thought it might have been because you hate ifdef more tha most :)
 
> The issue I see when using IS_ENABLED() is that you have to indent to the right, then you interfere 
> with the file history and 'git blame'

Valid point if it's just going to indent back the other way in your next 
series.

> Thanks for reviewing my series and looking forward to your feedback on my series on the interrupt 
> entry/exit that I will likely release later today.

Cool, I'm eager to see them.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v5 09/22] powerpc/syscall: Make interrupt.c buildable on PPC32
From: Nicholas Piggin @ 2021-02-09  7:50 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <73fd6e9b-fe99-e804-d681-c0a22b9bef38@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 4:02 pm:
> 
> 
> Le 09/02/2021 à 02:27, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> To allow building interrupt.c on PPC32, ifdef out specific PPC64
>>> code or use helpers which are available on both PP32 and PPC64
>>>
>>> Modify Makefile to always build interrupt.o
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>> v5:
>>> - Also for interrupt exit preparation
>>> - Opted out kuap related code, ppc32 keeps it in ASM for the time being
>>> ---
>>>   arch/powerpc/kernel/Makefile    |  4 ++--
>>>   arch/powerpc/kernel/interrupt.c | 31 ++++++++++++++++++++++++-------
>>>   2 files changed, 26 insertions(+), 9 deletions(-)
>>>
> 
>>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>>> index d6be4f9a67e5..2dac4d2bb1cf 100644
>>> --- a/arch/powerpc/kernel/interrupt.c
>>> +++ b/arch/powerpc/kernel/interrupt.c
>>> @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>>   		BUG_ON(!(regs->msr & MSR_RI));
>>>   	BUG_ON(!(regs->msr & MSR_PR));
>>>   	BUG_ON(!FULL_REGS(regs));
>>> -	BUG_ON(regs->softe != IRQS_ENABLED);
>>> +	BUG_ON(arch_irq_disabled_regs(regs));
>>>   
>>>   #ifdef CONFIG_PPC_PKEY
>>>   	if (mmu_has_feature(MMU_FTR_PKEY)) {
>>> @@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>>   			isync();
>>>   	} else
>>>   #endif
>>> +#ifdef CONFIG_PPC64
>>>   		kuap_check_amr();
>>> +#endif
>> 
>> Wouldn't mind trying to get rid of these ifdefs at some point, but
>> there's some kuap / keys changes going on recently so I'm happy enough
>> to let this settle then look at whether we can refactor.
> 
> I have a follow up series that implements interrupts entries/exits in C and that removes all kuap 
> assembly, I will likely release it as RFC later today.
> 
>> 
>>>   
>>>   	account_cpu_user_entry();
>>>   
>>> @@ -318,7 +323,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>>>   	return ret;
>>>   }
>>>   
>>> -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
>>> +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
>>>   notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
>>>   {
>>>   #ifdef CONFIG_PPC_BOOK3E
>> 
>> Why are you building this for 32? I don't mind if it's just to keep
>> things similar and make it build for now, but you're not using it yet,
>> right?
> 
> The series using that will follow, I thought it would be worth doing this at once.

Yeah that's fine by me then.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v5 17/22] powerpc/syscall: Do not check unsupported scv vector on PPC32
From: Nicholas Piggin @ 2021-02-09  7:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <82c4abb1-cb52-e856-b2dd-d7c7d48bd292@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 4:13 pm:
> 
> 
> Le 09/02/2021 à 03:00, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
>>> For that, add a helper trap_is_unsupported_scv() similar to
>>> trap_is_scv().
>>>
>>> And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
>>> 346 => 332 cycles)
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>> v5: Added a helper trap_is_unsupported_scv()
>>> ---
>>>   arch/powerpc/include/asm/ptrace.h | 5 +++++
>>>   arch/powerpc/kernel/entry_32.S    | 1 -
>>>   arch/powerpc/kernel/interrupt.c   | 7 +++++--
>>>   3 files changed, 10 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
>>> index 58f9dc060a7b..2c842b11a924 100644
>>> --- a/arch/powerpc/include/asm/ptrace.h
>>> +++ b/arch/powerpc/include/asm/ptrace.h
>>> @@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs)
>>>   	return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
>>>   }
>>>   
>>> +static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
>>> +{
>>> +	return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0);
>>> +}
>> 
>> This change is good.
>> 
>>> +
>>>   static inline bool trap_is_syscall(struct pt_regs *regs)
>>>   {
>>>   	return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
>>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>>> index cffe58e63356..7c824e8928d0 100644
>>> --- a/arch/powerpc/kernel/entry_32.S
>>> +++ b/arch/powerpc/kernel/entry_32.S
>>> @@ -344,7 +344,6 @@ transfer_to_syscall:
>>>   
>>>   ret_from_syscall:
>>>   	addi    r4,r1,STACK_FRAME_OVERHEAD
>>> -	li	r5,0
>>>   	bl	syscall_exit_prepare
>> 
>> For this one, I think it would be nice to do the "right" thing and make
>> the function prototypes different on !64S. They could then declare a
>> local const bool scv = 0.
>> 
>> We could have syscall_exit_prepare and syscall_exit_prepare_maybe_scv
>> or something like that, 64s can use the latter one and the former can be
>> a wrapper that passes constant 0 for scv. Then we don't have different
>> prototypes for the same function, but you just have to make the 32-bit
>> version static inline and the 64-bit version exported to asm.
> 
> You can't call a static inline function from ASM, I don't understand you.

I mean

#ifdef CONFIG_PPC_BOOK3S_64
notrace unsigned long syscall_exit_prepare_scv(unsigned long r3,
                                           struct pt_regs *regs,
                                           long scv)
#else
static inline long syscall_exit_prepare_scv(unsigned long r3,
                                           struct pt_regs *regs,
                                           long scv)
#endif

#ifndef CONFIG_PPC_BOOK3S_64
notrace unsigned long syscall_exit_prepare(unsigned long r3,
                                           struct pt_regs *regs)
{
	return syscall_exit_prepare_scv(r3, regs, 0);
}
#endif


> 
> What is wrong for you really here ? Is that the fact we leave scv random, or is that the below 
> IS_ENABLED() ?

That scv arg is random. I know generated code essentially would be no 
different and no possibility of tracing, but would just prefer to call 
the C "correctly" if possible.

> I don't mind keeping the 'li r5,0' before calling the function if you find it cleaner, the real 
> performance gain is with setting scv to 0 below for PPC32 (and maybe it should be set to zero for 
> book3e/64 too ?).

Yes 64e would like this optimisation.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v4 01/14] swiotlb: Remove external access to io_tlb_start
From: Christoph Hellwig @ 2021-02-09  8:40 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
	mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
	Joerg Roedel, Rafael J . Wysocki, Christoph Hellwig,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	Will Deacon, Konrad Rzeszutek Wilk, Dan Williams, linuxppc-dev,
	Rob Herring, boris.ostrovsky, Andy Shevchenko, jgross,
	Nicolas Boichat, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, xypron.glpk,
	Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-2-tientzu@chromium.org>

On Tue, Feb 09, 2021 at 02:21:18PM +0800, Claire Chang wrote:
> This can be dropped if Christoph's swiotlb cleanups are landed.
> https://lore.kernel.org/linux-iommu/20210207160934.2955931-1-hch@lst.de/T/#m7124f29b6076d462101fcff6433295157621da09 

FYI, I've also started looking into additional cleanups based on your
struct in this branch, but I'd like to wait for all the previous
changes to settle first:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/swiotlb-struct

^ permalink raw reply

* Re: [PATCH v2 2/7] ASoC: fsl_rpmsg: Add CPU DAI driver for audio base on rpmsg
From: Shengjiu Wang @ 2021-02-09  9:16 UTC (permalink / raw)
  To: Mark Brown
  Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
	alsa-devel, Timur Tabi, Xiubo Li, Fabio Estevam, Shengjiu Wang,
	Takashi Iwai, Liam Girdwood, linux-kernel, Nicolin Chen,
	Rob Herring, linuxppc-dev
In-Reply-To: <20210208115112.GD8645@sirena.org.uk>

On Mon, Feb 8, 2021 at 7:53 PM Mark Brown <broonie@kernel.org> wrote:
>
> On Sun, Feb 07, 2021 at 06:23:50PM +0800, Shengjiu Wang wrote:
>
> > +static int fsl_rpmsg_hw_params(struct snd_pcm_substream *substream,
> > +                            struct snd_pcm_hw_params *params,
> > +                            struct snd_soc_dai *dai)
> > +{
>
> ...
>
> > +     ret = clk_prepare_enable(rpmsg->mclk);
> > +     if (ret)
> > +             dev_err(dai->dev, "failed to enable mclk: %d\n", ret);
> > +
> > +     return ret;
> > +}
> > +
> > +static int fsl_rpmsg_hw_free(struct snd_pcm_substream *substream,
> > +                          struct snd_soc_dai *dai)
> > +{
> > +     struct fsl_rpmsg *rpmsg = snd_soc_dai_get_drvdata(dai);
> > +
> > +     clk_disable_unprepare(rpmsg->mclk);
>
> hw_params() can be called multiple times and there's no need for it to
> be balanced with hw_free(), I'd move this to a different callback (DAPM
> should work well).

Which callback should I use? Is there an example?

best regards
wang shengjiu

^ permalink raw reply

* [PATCH V3] powerpc/perf: Adds support for programming of Thresholding in P10
From: Kajol Jain @ 2021-02-09  9:52 UTC (permalink / raw)
  To: mpe; +Cc: kjain, atrajeev, maddy, linuxppc-dev

Thresholding, a performance monitoring unit feature, can be
used to identify marked instructions which take more than
expected cycles between start event and end event.
Threshold compare (thresh_cmp) bits are programmed in MMCRA
register. In Power9, thresh_cmp bits were part of the
event code. But in case of P10, thresh_cmp are not part of
event code due to inclusion of MMCR3 bits.

Patch here adds an option to use attr.config1 variable
to be used to pass thresh_cmp value to be programmed in
MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1
has been added and this flag is used to notify the use of
attr.config1 variable.

Patch has extended the parameter list of 'compute_mmcr',
to include power_pmu's 'flags' element and parameter list of
get_constraint to include attr.config1 value. It also extend
parameter list of power_check_constraints inorder to pass
perf_event list.

As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold
Event Counter Multiplier width for P10"), constraint bits for
thresh_cmp is also needed to be increased to 11 bits, which is
handled as part of this patch. We added bit number 53 as part
of constraint bits of thresh_cmp for power10 to make it an
11 bit field.

Updated layout for p10:

/*
 * Layout of constraint bits:
 *
 *        60        56        52        48        44        40        36        32
 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
 *   [   fab_match   ]         [       thresh_cmp      ] [   thresh_ctl    ] [   ]
 *                                          |                                  |
 *                           [  thresh_cmp bits for p10]           thresh_sel -*
 *
 *        28        24        20        16        12         8         4         0
 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
 *               [ ] |   [ ] |  [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1]
 *                |  |    |  |                  |
 *      BHRB IFM -*  |    |  |*radix_scope      |      Count of events for each PMC.
 *              EBB -*    |                     |        p1, p2, p3, p4, p5, p6.
 *      L1 I/D qualifier -*                     |
 *                     nc - number of counters -*
 *
 * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
 * we want the low bit of each field to be added to any existing value.
 *
 * Everything else is a value field.
 */

Result:
command#: cat /sys/devices/cpu/format/thresh_cmp
config1:0-17

ex. usage:

command#: perf record -I --weight -d  -e
	 cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096
1826636 records/s
real  2.00 s
user  2.00 s
sys   0.00 s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ]

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/include/asm/perf_event_server.h |  5 +-
 arch/powerpc/perf/core-book3s.c              | 15 +++--
 arch/powerpc/perf/isa207-common.c            | 67 +++++++++++++++++---
 arch/powerpc/perf/isa207-common.h            | 15 +++--
 arch/powerpc/perf/mpc7450-pmu.c              |  5 +-
 arch/powerpc/perf/power10-pmu.c              |  4 +-
 arch/powerpc/perf/power5+-pmu.c              |  5 +-
 arch/powerpc/perf/power5-pmu.c               |  5 +-
 arch/powerpc/perf/power6-pmu.c               |  5 +-
 arch/powerpc/perf/power7-pmu.c               |  5 +-
 arch/powerpc/perf/ppc970-pmu.c               |  5 +-
 11 files changed, 102 insertions(+), 34 deletions(-)

---
Changelog
v2 -> v3
- Removed field 'events_config1' from struct cpu_hw_events
  and directly using attr.config1 field instead as suggested
  by Michael Ellerman.
- Extended the parameter list of 'power_check_constraints' function
  to also pass perf_event struct array as we need to access
  event's attr.config1 value. And since when we call
  'power_check_constraints' from 'power_pmu_event_init' the
  cpu_hw_events structure not get updated.

v1 -> v2
- Add new function 'p10_thresh_cmp_val' to evaluate thresh_cmp
  value.
- Extended the parameter list of get_constraint function
  to include attr.config1 value.
- Added bit number 53 as part of constraint bits of thresh_cmp
  to make it a 11 bit field for power10.
- Updated PPMU_HAS_ATTR_CONFIG1 value to 0x00000800
- Add new field 'events_config1' in struct cpu_hw_events and also
  update this field in all required functions accordingly.
---

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 3b7baba01c92..00e7e671bb4b 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -36,9 +36,9 @@ struct power_pmu {
 	unsigned long	test_adder;
 	int		(*compute_mmcr)(u64 events[], int n_ev,
 				unsigned int hwc[], struct mmcr_regs *mmcr,
-				struct perf_event *pevents[]);
+				struct perf_event *pevents[], u32 flags);
 	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
-				unsigned long *valp);
+				unsigned long *valp, u64 event_config1);
 	int		(*get_alternatives)(u64 event_id, unsigned int flags,
 				u64 alt[]);
 	void		(*get_mem_data_src)(union perf_mem_data_src *dsrc,
@@ -83,6 +83,7 @@ struct power_pmu {
 #define PPMU_NO_SIAR		0x00000100 /* Do not use SIAR */
 #define PPMU_ARCH_31		0x00000200 /* Has MMCR3, SIER2 and SIER3 */
 #define PPMU_P10_DD1		0x00000400 /* Is power10 DD1 processor version */
+#define PPMU_HAS_ATTR_CONFIG1	0x00000800 /* Using config1 attribute */
 
 /*
  * Values for flags to get_alternatives()
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 28206b1fe172..85073d9e79fe 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -915,7 +915,7 @@ void perf_event_print_debug(void)
  */
 static int power_check_constraints(struct cpu_hw_events *cpuhw,
 				   u64 event_id[], unsigned int cflags[],
-				   int n_ev)
+				   int n_ev, struct perf_event **event)
 {
 	unsigned long mask, value, nv;
 	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
@@ -938,7 +938,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
 			event_id[i] = cpuhw->alternatives[i][0];
 		}
 		if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
-					 &cpuhw->avalues[i][0]))
+					 &cpuhw->avalues[i][0], event[i]->attr.config1))
 			return -1;
 	}
 	value = mask = 0;
@@ -973,7 +973,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
 		for (j = 1; j < n_alt[i]; ++j)
 			ppmu->get_constraint(cpuhw->alternatives[i][j],
 					     &cpuhw->amasks[i][j],
-					     &cpuhw->avalues[i][j]);
+					     &cpuhw->avalues[i][j],
+					     event[i]->attr.config1);
 	}
 
 	/* enumerate all possibilities and see if any will work */
@@ -1391,7 +1392,7 @@ static void power_pmu_enable(struct pmu *pmu)
 	memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
 
 	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
-			       &cpuhw->mmcr, cpuhw->event)) {
+			       &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
 		/* shouldn't ever get here */
 		printk(KERN_ERR "oops compute_mmcr failed\n");
 		goto out;
@@ -1579,7 +1580,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
 		goto out;
-	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
 		goto out;
 	event->hw.config = cpuhw->events[n0];
 
@@ -1789,7 +1790,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 	n = cpuhw->n_events;
 	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
 		return -EAGAIN;
-	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
 	if (i < 0)
 		return -EAGAIN;
 
@@ -2027,7 +2028,7 @@ static int power_pmu_event_init(struct perf_event *event)
 	local_irq_save(irq_flags);
 	cpuhw = this_cpu_ptr(&cpu_hw_events);
 
-	err = power_check_constraints(cpuhw, events, cflags, n + 1);
+	err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
 
 	if (has_branch_stack(event)) {
 		u64 bhrb_filter = -1;
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 6ab5b272090a..e4f577da33d8 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 		*mmcra |= MMCRA_SDAR_MODE_TLB;
 }
 
+static u64 p10_thresh_cmp_val(u64 value)
+{
+	int exp = 0;
+	u64 result = value;
+
+	if (!value)
+		return value;
+
+	/*
+	 * Incase of P10, thresh_cmp value is not part of raw event code
+	 * and provided via attr.config1 parameter. To program threshold in MMCRA,
+	 * take a 18 bit number N and shift right 2 places and increment
+	 * the exponent E by 1 until the upper 10 bits of N are zero.
+	 * Write E to the threshold exponent and write the lower 8 bits of N
+	 * to the threshold mantissa.
+	 * The max threshold that can be written is 261120.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (value > 261120)
+			value = 261120;
+		while ((64 - __builtin_clzl(value)) > 8) {
+			exp++;
+			value >>= 2;
+		}
+
+		/*
+		 * Note that it is invalid to write a mantissa with the
+		 * upper 2 bits of mantissa being zero, unless the
+		 * exponent is also zero.
+		 */
+		if (!(value & 0xC0) && exp)
+			result = 0;
+		else
+			result = (exp << 8) | value;
+	}
+	return result;
+}
+
 static u64 thresh_cmp_val(u64 value)
 {
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		value = p10_thresh_cmp_val(value);
+
+	/*
+	 * Since location of threshold compare bits in MMCRA
+	 * is different for p8, using different shift value.
+	 */
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return value << p9_MMCRA_THR_CMP_SHIFT;
-
-	return value << MMCRA_THR_CMP_SHIFT;
+	else
+		return value << MMCRA_THR_CMP_SHIFT;
 }
 
 static unsigned long combine_from_event(u64 event)
@@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event)
 {
 	unsigned int cmp, exp;
 
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return p10_thresh_cmp_val(event) != 0;
+
 	/*
 	 * Check the mantissa upper two bits are not zero, unless the
 	 * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
-	 * Power10: thresh_cmp is replaced by l2_l3 event select.
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		return false;
 
 	cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
 	exp = cmp >> 7;
@@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight)
 		*weight = mantissa << (2 * exp);
 }
 
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
 {
 	unsigned int unit, pmc, cache, ebb;
 	unsigned long mask, value;
@@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 	}
 
 	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-		if (event_is_threshold(event)) {
+		if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
 			mask  |= CNST_THRESH_CTL_SEL_MASK;
 			value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+			mask  |= p10_CNST_THRESH_CMP_MASK;
+			value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
 		}
 	} else if (cpu_has_feature(CPU_FTR_ARCH_300))  {
 		if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
@@ -411,7 +458,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 
 int isa207_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], struct mmcr_regs *mmcr,
-			       struct perf_event *pevents[])
+			       struct perf_event *pevents[], u32 flags)
 {
 	unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
 	unsigned long mmcr3;
@@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 				val = (event[i] >> EVENT_THR_CMP_SHIFT) &
 					EVENT_THR_CMP_MASK;
 				mmcra |= thresh_cmp_val(val);
+			} else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+				val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+					p10_EVENT_THR_CMP_MASK;
+				mmcra |= thresh_cmp_val(val);
 			}
 		}
 
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 454b32c31440..1af0e8c97ac7 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -105,6 +105,10 @@
 #define p10_EVENT_RADIX_SCOPE_QUAL_MASK	0x1
 #define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT	45
 
+/* Event Threshold Compare bit constant for power10 in config1 attribute */
+#define p10_EVENT_THR_CMP_SHIFT        0
+#define p10_EVENT_THR_CMP_MASK 0x3FFFFull
+
 #define p10_EVENT_VALID_MASK		\
 	((p10_SDAR_MODE_MASK   << p10_SDAR_MODE_SHIFT		|	\
 	(p10_EVENT_THRESH_MASK  << EVENT_THRESH_SHIFT)		|	\
@@ -124,8 +128,8 @@
  *        60        56        52        48        44        40        36        32
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  *   [   fab_match   ]         [       thresh_cmp      ] [   thresh_ctl    ] [   ]
- *                                                                             |
- *                                                                 thresh_sel -*
+ *                                          |                                  |
+ *                           [  thresh_cmp bits for p10]           thresh_sel -*
  *
  *        28        24        20        16        12         8         4         0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
@@ -152,6 +156,9 @@
 #define CNST_THRESH_CTL_SEL_VAL(v)	(((v) & 0x7ffull) << 32)
 #define CNST_THRESH_CTL_SEL_MASK	CNST_THRESH_CTL_SEL_VAL(0x7ff)
 
+#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43)
+#define p10_CNST_THRESH_CMP_MASK   p10_CNST_THRESH_CMP_VAL(0x7ff)
+
 #define CNST_EBB_VAL(v)		(((v) & EVENT_EBB_MASK) << 24)
 #define CNST_EBB_MASK		CNST_EBB_VAL(EVENT_EBB_MASK)
 
@@ -262,10 +269,10 @@
 #define PH(a, b)			(P(LVL, HIT) | P(a, b))
 #define PM(a, b)			(P(LVL, MISS) | P(a, b))
 
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp);
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
 int isa207_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], struct mmcr_regs *mmcr,
-				struct perf_event *pevents[]);
+				struct perf_event *pevents[], u32 flags);
 void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
 int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 					const unsigned int ev_alt[][MAX_ALT]);
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index 1919e9df9165..e39b15b79a83 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -148,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = {
 };
 
 static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
-				  unsigned long *valp)
+				  unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, class;
 	u32 mask, value;
@@ -258,7 +258,8 @@ static const u32 pmcsel_mask[N_COUNTER] = {
  */
 static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
 				struct mmcr_regs *mmcr,
-				struct perf_event *pevents[])
+				struct perf_event *pevents[],
+				u32 flags __maybe_unused)
 {
 	u8 event_index[N_CLASSES][N_COUNTER];
 	int n_classevent[N_CLASSES];
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 79e0206ca454..a901c1348cad 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -216,6 +216,7 @@ PMU_FORMAT_ATTR(invert_bit,     "config:47");
 PMU_FORMAT_ATTR(src_mask,       "config:48-53");
 PMU_FORMAT_ATTR(src_match,      "config:54-59");
 PMU_FORMAT_ATTR(radix_scope,	"config:9");
+PMU_FORMAT_ATTR(thresh_cmp,     "config1:0-17");
 
 static struct attribute *power10_pmu_format_attr[] = {
 	&format_attr_event.attr,
@@ -236,6 +237,7 @@ static struct attribute *power10_pmu_format_attr[] = {
 	&format_attr_src_mask.attr,
 	&format_attr_src_match.attr,
 	&format_attr_radix_scope.attr,
+	&format_attr_thresh_cmp.attr,
 	NULL,
 };
 
@@ -550,7 +552,7 @@ static struct power_pmu power10_pmu = {
 	.get_mem_weight		= isa207_get_mem_weight,
 	.disable_pmc		= isa207_disable_pmc,
 	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S |
-				  PPMU_ARCH_31,
+				  PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1,
 	.n_generic		= ARRAY_SIZE(power10_generic_events),
 	.generic_events		= power10_generic_events,
 	.cache_events		= &power10_cache_events,
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 3e64b4a1511f..18732267993a 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -132,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
 };
 
 static int power5p_get_constraint(u64 event, unsigned long *maskp,
-				  unsigned long *valp)
+				  unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, unit, sh;
 	int bit, fmask;
@@ -451,7 +451,8 @@ static int power5p_marked_instr_event(u64 event)
 
 static int power5p_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], struct mmcr_regs *mmcr,
-				struct perf_event *pevents[])
+				struct perf_event *pevents[],
+				u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = 0;
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 017bb19b73fb..cb611c1e7abe 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -136,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
 };
 
 static int power5_get_constraint(u64 event, unsigned long *maskp,
-				 unsigned long *valp)
+				 unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, unit, sh;
 	int bit, fmask;
@@ -382,7 +382,8 @@ static int power5_marked_instr_event(u64 event)
 
 static int power5_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], struct mmcr_regs *mmcr,
-			       struct perf_event *pevents[])
+			       struct perf_event *pevents[],
+			       u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 189974478e9f..69ef38216418 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -173,7 +173,8 @@ static int power6_marked_instr_event(u64 event)
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
 static int p6_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[])
+			   unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
+			   u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -266,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
  *	32-34	select field: nest (subunit) event selector
  */
 static int p6_get_constraint(u64 event, unsigned long *maskp,
-			     unsigned long *valp)
+			     unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, sh, subunit;
 	unsigned long mask = 0, value = 0;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index bacfab104a1a..894c17f9a762 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -81,7 +81,7 @@ enum {
  */
 
 static int power7_get_constraint(u64 event, unsigned long *maskp,
-				 unsigned long *valp)
+				 unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, sh, unit;
 	unsigned long mask = 0, value = 0;
@@ -245,7 +245,8 @@ static int power7_marked_instr_event(u64 event)
 
 static int power7_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], struct mmcr_regs *mmcr,
-			       struct perf_event *pevents[])
+			       struct perf_event *pevents[],
+			       u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 7d78df97f272..1f8263785286 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -190,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
 };
 
 static int p970_get_constraint(u64 event, unsigned long *maskp,
-			       unsigned long *valp)
+			       unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, unit, sh, spcsel;
 	unsigned long mask = 0, value = 0;
@@ -256,7 +256,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 
 static int p970_compute_mmcr(u64 event[], int n_ev,
 			     unsigned int hwc[], struct mmcr_regs *mmcr,
-			     struct perf_event *pevents[])
+			     struct perf_event *pevents[],
+			     u32 flags __maybe_unused)
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
-- 
2.26.2


^ permalink raw reply related

* [RFC PATCH v1 01/41] powerpc/32: Preserve cr1 in exception prolog stack check to fix build error
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

THREAD_ALIGN_SHIFT = THREAD_SHIFT + 1 = PAGE_SHIFT + 1
Maximum PAGE_SHIFT is 18 for 256k pages so
THREAD_ALIGN_SHIFT is 19 at the maximum.

No need to clobber cr1, it can be preserved when moving r1
into CR when we check stack overflow.

This reduces the number of instructions in Machine Check Exception
prolog and fixes a build failure reported by the kernel test robot
on v5.10 stable when building with RTAS + VMAP_STACK + KVM. That
build failure is due to too many instructions in the prolog hence
not fitting between 0x200 and 0x300. Allthough the problem doesn't
show up in mainline, it is still worth the change.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 98bf2d3f4970 ("powerpc/32s: Fix RTAS machine check with VMAP stack")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_32.h        | 2 +-
 arch/powerpc/kernel/head_book3s_32.S | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 961b1ce3b6bf..5d4706c14572 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -47,7 +47,7 @@
 	lwz	r1,TASK_STACK-THREAD(r1)
 	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 1:
-	mtcrf	0x7f, r1
+	mtcrf	0x3f, r1
 	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
 #else
 	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 086970bec32c..727fdab557c9 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -278,12 +278,6 @@ MachineCheck:
 7:	EXCEPTION_PROLOG_2
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_CHRP
-#ifdef CONFIG_VMAP_STACK
-	mfspr	r4, SPRN_SPRG_THREAD
-	tovirt(r4, r4)
-	lwz	r4, RTAS_SP(r4)
-	cmpwi	cr1, r4, 0
-#endif
 	beq	cr1, machine_check_tramp
 	twi	31, 0, 0
 #else
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 04/41] powerpc/40x: Save SRR0/SRR1 and r10/r11 earlier in critical exception
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In order to be able to switch MMU on in exception prolog, save
SRR0 and SRR1 earlier.

Also save r10 and r11 into stack earlier to better match with the
normal exception prolog.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/entry_32.S | 9 ---------
 arch/powerpc/kernel/head_40x.S | 8 ++++++++
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8dea4d3b1d06..1e59d0bb1a6f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -107,15 +107,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #ifdef CONFIG_40x
 	.globl	crit_transfer_to_handler
 crit_transfer_to_handler:
-	lwz	r0,crit_r10@l(0)
-	stw	r0,GPR10(r11)
-	lwz	r0,crit_r11@l(0)
-	stw	r0,GPR11(r11)
-	mfspr	r0,SPRN_SRR0
-	stw	r0,crit_srr0@l(0)
-	mfspr	r0,SPRN_SRR1
-	stw	r0,crit_srr1@l(0)
-
 	/* set the stack limit to the current stack */
 	mfspr	r8,SPRN_SPRG_THREAD
 	lwz	r0,KSP_LIMIT(r8)
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 9cef423d574b..067ae1302c1c 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -103,6 +103,10 @@ _ENTRY(saved_ksp_limit)
 .macro CRITICAL_EXCEPTION_PROLOG
 	stw	r10,crit_r10@l(0)	/* save two registers to work with */
 	stw	r11,crit_r11@l(0)
+	mfspr	r10,SPRN_SRR0
+	mfspr	r11,SPRN_SRR1
+	stw	r10,crit_srr0@l(0)
+	stw	r11,crit_srr1@l(0)
 	mfcr	r10			/* save CR in r10 for now	   */
 	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
 	andi.	r11,r11,MSR_PR
@@ -120,6 +124,10 @@ _ENTRY(saved_ksp_limit)
 	stw	r9,GPR9(r11)
 	mflr	r10
 	stw	r10,_LINK(r11)
+	lwz	r10,crit_r10@l(0)
+	lwz	r12,crit_r11@l(0)
+	stw	r10,GPR10(r11)
+	stw	r12,GPR11(r11)
 	mfspr	r12,SPRN_DEAR		/* save DEAR and ESR in the frame  */
 	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
 	mfspr	r9,SPRN_ESR		/* in them at the point where the  */
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 00/41] powerpc/32: Switch to interrupt entry/exit in C
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel

This series aims at porting interrupt entry/exit in C on PPC32, using
the work already merged for PPC64.

First part do minimal changes in 40x in order to be able to enable MMU
earlier in exception entry.

Second part prepares and switches interrupt exit in C.

Third part moves more and more things in C, ending with KUAP management.

v1 is boot tested on 8xx and 83xx, releasing it as an RFC to get early feedback.

This series applies on top of the one switching ppc32 syscall entry/exit in C.

First patch is a bug fix already submitted but not yet merged that interracts with the series.

Christophe Leroy (41):
  powerpc/32: Preserve cr1 in exception prolog stack check to fix build
    error
  powerpc/40x: Don't use SPRN_SPRG_SCRATCH0/1 in TLB miss handlers
  powerpc/40x: Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro
  powerpc/40x: Save SRR0/SRR1 and r10/r11 earlier in critical exception
  powerpc/40x: Reorder a few instructions in critical exception prolog
  powerpc/40x: Prepare for enabling MMU in critical exception prolog
  powerpc/40x: Prepare normal exception handler for enabling MMU early
  powerpc/32: Reconcile interrupts in C
  powerpc/32: Entry cpu time accounting in C
  powerpc/32: Handle bookE debugging in C in exception entry
  powerpc/32: Use fast instruction to set MSR RI in exception prolog on
    8xx
  powerpc/32: Remove ksp_limit
  powerpc/32: Always enable data translation in exception prolog
  powerpc/32: Tag DAR in EXCEPTION_PROLOG_2 for the 8xx
  powerpc/32: Enable instruction translation at the same time as data
    translation
  powerpc/32: Statically initialise first emergency context
  powerpc/32: Add vmap_stack_overflow label inside the macro
  powerpc/32: Use START_EXCEPTION() as much as possible
  powerpc/32: Move exception prolog code into .text once MMU is back on
  powerpc/32: Provide a name to exception prolog continuation in virtual
    mode
  powerpc/32: Refactor booke critical registers saving
  powerpc/32: Perform normal function call in exception entry
  powerpc/32: Always save non volatile registers on exception entry
  powerpc/32: Replace ASM exception exit by C exception exit from ppc64
  powerpc/32: Set regs parameter in r3 in transfer_to_handler
  powerpc/32: Remove handle_page_fault()
  powerpc/32: Save trap number on stack in exception prolog
  powerpc/32: Add a prepare_transfer_to_handler macro for exception
    prologs
  powerpc/32: Only restore non volatile registers when required
  powerpc/32: Dismantle EXC_XFER_STD/LITE/TEMPLATE
  powerpc/32: Remove the xfer parameter in EXCEPTION() macro
  powerpc/32: Refactor saving of volatile registers in exception prologs
  powerpc/32: Save remaining registers in exception prolog
  powerpc/32: Set current->thread.regs in C interrupt entry
  powerpc/32: Return directly from power_save_ppc32_restore()
  powerpc/32: Only use prepare_transfer_to_handler function on book3s/32
    and e500
  powerpc/32s: Move KUEP locking/unlocking in C
  powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr()
    generic
  powerpc/32s: Create C version of kuap save/restore/check helpers
  powerpc/8xx: Create C version of kuap save/restore/check helpers
  powerpc/32: Manage KUAP in C

 arch/powerpc/include/asm/book3s/32/kup.h     | 126 ++-
 arch/powerpc/include/asm/book3s/64/kup.h     |  24 +-
 arch/powerpc/include/asm/interrupt.h         |  21 +
 arch/powerpc/include/asm/kup.h               |  37 +-
 arch/powerpc/include/asm/nohash/32/kup-8xx.h |  58 +-
 arch/powerpc/include/asm/ppc_asm.h           |  10 -
 arch/powerpc/include/asm/processor.h         |   6 +-
 arch/powerpc/include/asm/ptrace.h            |  13 +-
 arch/powerpc/kernel/asm-offsets.c            |   4 -
 arch/powerpc/kernel/entry_32.S               | 810 ++++---------------
 arch/powerpc/kernel/fpu.S                    |   2 -
 arch/powerpc/kernel/head_32.h                | 197 ++---
 arch/powerpc/kernel/head_40x.S               | 271 ++++---
 arch/powerpc/kernel/head_44x.S               |  10 +-
 arch/powerpc/kernel/head_8xx.S               | 151 ++--
 arch/powerpc/kernel/head_book3s_32.S         | 239 +++---
 arch/powerpc/kernel/head_booke.h             | 188 +++--
 arch/powerpc/kernel/head_fsl_booke.S         |  64 +-
 arch/powerpc/kernel/idle_6xx.S               |  14 +-
 arch/powerpc/kernel/idle_e500.S              |  14 +-
 arch/powerpc/kernel/interrupt.c              |  35 +-
 arch/powerpc/kernel/misc_32.S                |  14 -
 arch/powerpc/kernel/process.c                |   6 +-
 arch/powerpc/kernel/setup_32.c               |   2 +-
 arch/powerpc/kernel/traps.c                  |   9 -
 arch/powerpc/kernel/vector.S                 |   2 -
 arch/powerpc/lib/sstep.c                     |   9 -
 arch/powerpc/mm/book3s32/Makefile            |   1 +
 arch/powerpc/mm/book3s32/hash_low.S          |  14 -
 arch/powerpc/mm/book3s32/kuep.c              |  38 +
 arch/powerpc/mm/fault.c                      |   4 +-
 31 files changed, 875 insertions(+), 1518 deletions(-)
 create mode 100644 arch/powerpc/mm/book3s32/kuep.c

-- 
2.25.0


^ permalink raw reply

* [RFC PATCH v1 03/41] powerpc/40x: Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro to
remove the ugly ; and \ on each line.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_40x.S | 71 +++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 383238a98f77..9cef423d574b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -100,42 +100,43 @@ _ENTRY(saved_ksp_limit)
  * Instead we use a couple of words of memory at low physical addresses.
  * This is OK since we don't support SMP on these processors.
  */
-#define CRITICAL_EXCEPTION_PROLOG					     \
-	stw	r10,crit_r10@l(0);	/* save two registers to work with */\
-	stw	r11,crit_r11@l(0);					     \
-	mfcr	r10;			/* save CR in r10 for now	   */\
-	mfspr	r11,SPRN_SRR3;		/* check whether user or kernel    */\
-	andi.	r11,r11,MSR_PR;						     \
-	lis	r11,critirq_ctx@ha;					     \
-	tophys(r11,r11);						     \
-	lwz	r11,critirq_ctx@l(r11);					     \
-	beq	1f;							     \
-	/* COMING FROM USER MODE */					     \
-	mfspr	r11,SPRN_SPRG_THREAD;	/* if from user, start at top of   */\
-	lwz	r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm  */\
-	tophys(r11,r11);						     \
-	stw	r10,_CCR(r11);          /* save various registers	   */\
-	stw	r12,GPR12(r11);						     \
-	stw	r9,GPR9(r11);						     \
-	mflr	r10;							     \
-	stw	r10,_LINK(r11);						     \
-	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
-	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
-	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
-	stw	r9,_ESR(r11);		/* exception was taken		   */\
-	mfspr	r12,SPRN_SRR2;						     \
-	stw	r1,GPR1(r11);						     \
-	mfspr	r9,SPRN_SRR3;						     \
-	stw	r1,0(r11);						     \
-	tovirt(r1,r11);							     \
-	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
-	stw	r0,GPR0(r11);						     \
-	lis	r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
-	addi	r10, r10, STACK_FRAME_REGS_MARKER@l;			     \
-	stw	r10, 8(r11);						     \
-	SAVE_4GPRS(3, r11);						     \
+.macro CRITICAL_EXCEPTION_PROLOG
+	stw	r10,crit_r10@l(0)	/* save two registers to work with */
+	stw	r11,crit_r11@l(0)
+	mfcr	r10			/* save CR in r10 for now	   */
+	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
+	andi.	r11,r11,MSR_PR
+	lis	r11,critirq_ctx@ha
+	tophys(r11,r11)
+	lwz	r11,critirq_ctx@l(r11)
+	beq	1f
+	/* COMING FROM USER MODE */
+	mfspr	r11,SPRN_SPRG_THREAD	/* if from user, start at top of   */
+	lwz	r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
+	tophys(r11,r11)
+	stw	r10,_CCR(r11)		/* save various registers	   */
+	stw	r12,GPR12(r11)
+	stw	r9,GPR9(r11)
+	mflr	r10
+	stw	r10,_LINK(r11)
+	mfspr	r12,SPRN_DEAR		/* save DEAR and ESR in the frame  */
+	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
+	mfspr	r9,SPRN_ESR		/* in them at the point where the  */
+	stw	r9,_ESR(r11)		/* exception was taken		   */
+	mfspr	r12,SPRN_SRR2
+	stw	r1,GPR1(r11)
+	mfspr	r9,SPRN_SRR3
+	stw	r1,0(r11)
+	tovirt(r1,r11)
+	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?)	   */
+	stw	r0,GPR0(r11)
+	lis	r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+	addi	r10, r10, STACK_FRAME_REGS_MARKER@l
+	stw	r10, 8(r11)
+	SAVE_4GPRS(3, r11)
 	SAVE_2GPRS(7, r11)
+.endm
 
 	/*
 	 * State at this point:
-- 
2.25.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox