LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v3 4/6] swiotlb: Add restricted DMA alloc/free support.
From: Claire Chang @ 2021-01-06  3:41 UTC (permalink / raw)
  To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
	boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
	robin.murphy
  Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
	xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
	devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
	gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
	bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>

Add the functions, swiotlb_alloc and swiotlb_free to support the
memory allocation from restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h |   6 ++
 kernel/dma/direct.c     |  12 +++
 kernel/dma/swiotlb.c    | 171 +++++++++++++++++++++++++++++-----------
 3 files changed, 144 insertions(+), 45 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5135e5636042..84fe96e40685 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -68,6 +68,12 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
 
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		    unsigned long attrs);
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+		  dma_addr_t dma_addr, unsigned long attrs);
+
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 30ccbc08e229..126e9b3354d6 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -137,6 +137,11 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 	void *ret;
 	int err;
 
+#ifdef CONFIG_SWIOTLB
+	if (unlikely(dev->dma_io_tlb_mem))
+		return swiotlb_alloc(dev, size, dma_handle, attrs);
+#endif
+
 	size = PAGE_ALIGN(size);
 	if (attrs & DMA_ATTR_NO_WARN)
 		gfp |= __GFP_NOWARN;
@@ -246,6 +251,13 @@ void dma_direct_free(struct device *dev, size_t size,
 {
 	unsigned int page_order = get_order(size);
 
+#ifdef CONFIG_SWIOTLB
+	if (unlikely(dev->dma_io_tlb_mem)) {
+		swiotlb_free(dev, size, cpu_addr, dma_addr, attrs);
+		return;
+	}
+#endif
+
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
 	    !force_dma_unencrypted(dev)) {
 		/* cpu_addr is a struct page cookie, not a kernel address */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1f05af09e61a..ca88ef59435d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -459,14 +459,13 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 	}
 }
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
-		size_t mapping_size, size_t alloc_size,
-		enum dma_data_direction dir, unsigned long attrs)
+static int swiotlb_tbl_find_free_region(struct device *hwdev,
+					dma_addr_t tbl_dma_addr,
+					size_t alloc_size,
+					unsigned long attrs)
 {
 	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
 	unsigned long flags;
-	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
 	unsigned long mask;
@@ -477,15 +476,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	if (no_iotlb_memory && !hwdev->dma_io_tlb_mem)
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
 
-	if (mem_encrypt_active())
-		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
-
-	if (mapping_size > alloc_size) {
-		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
-			      mapping_size, alloc_size);
-		return (phys_addr_t)DMA_MAPPING_ERROR;
-	}
-
 	mask = dma_get_seg_boundary(hwdev);
 
 	tbl_dma_addr &= mask;
@@ -547,7 +537,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 				mem->list[i] = 0;
 			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && mem->list[i]; i--)
 				mem->list[i] = ++count;
-			tlb_addr = mem->start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
@@ -570,45 +559,21 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
 			 alloc_size, mem->nslabs, tmp_io_tlb_used);
-	return (phys_addr_t)DMA_MAPPING_ERROR;
+	return -ENOMEM;
+
 found:
 	mem->used += nslots;
 	spin_unlock_irqrestore(&mem->lock, flags);
 
-	/*
-	 * Save away the mapping from the original address to the DMA address.
-	 * This is needed when we sync the memory.  Then we sync the buffer if
-	 * needed.
-	 */
-	for (i = 0; i < nslots; i++)
-		mem->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
-	return tlb_addr;
+	return index;
 }
 
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t mapping_size, size_t alloc_size,
-			      enum dma_data_direction dir, unsigned long attrs)
+static void swiotlb_tbl_release_region(struct device *hwdev, int index,
+				       size_t size)
 {
 	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
 	unsigned long flags;
-	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = mem->orig_addr[index];
-
-	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (orig_addr != INVALID_PHYS_ADDR &&
-	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -640,6 +605,69 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	spin_unlock_irqrestore(&mem->lock, flags);
 }
 
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+		size_t mapping_size, size_t alloc_size,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
+	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
+	phys_addr_t tlb_addr;
+	unsigned int nslots, index;
+	int i;
+
+	if (mem_encrypt_active())
+		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+	if (mapping_size > alloc_size) {
+		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+			      mapping_size, alloc_size);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
+
+	index = swiotlb_tbl_find_free_region(hwdev, tbl_dma_addr, alloc_size,
+					     attrs);
+	if (index < 0)
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+
+	tlb_addr = mem->start + (index << IO_TLB_SHIFT);
+
+	/*
+	 * Save away the mapping from the original address to the DMA address.
+	 * This is needed when we sync the memory.  Then we sync the buffer if
+	 * needed.
+	 */
+	nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	for (i = 0; i < nslots; i++)
+		mem->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+
+	return tlb_addr;
+}
+
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+			      size_t mapping_size, size_t alloc_size,
+			      enum dma_data_direction dir, unsigned long attrs)
+{
+	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
+	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = mem->orig_addr[index];
+
+	/*
+	 * First, sync the memory before unmapping the entry
+	 */
+	if (orig_addr != INVALID_PHYS_ADDR &&
+	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+
+	swiotlb_tbl_release_region(hwdev, index, alloc_size);
+}
+
 void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
@@ -706,6 +734,59 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
 	return dma_addr;
 }
 
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		    unsigned long attrs)
+{
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+	int index;
+	void *vaddr;
+	phys_addr_t tlb_addr;
+
+	size = PAGE_ALIGN(size);
+	index = swiotlb_tbl_find_free_region(dev, mem->start, size, attrs);
+	if (index < 0)
+		return NULL;
+
+	tlb_addr = mem->start + (index << IO_TLB_SHIFT);
+	*dma_handle = phys_to_dma_unencrypted(dev, tlb_addr);
+
+	if (!dev_is_dma_coherent(dev)) {
+		unsigned long pfn = PFN_DOWN(tlb_addr);
+
+		/* remove any dirty cache lines on the kernel alias */
+		arch_dma_prep_coherent(pfn_to_page(pfn), size);
+
+		/* create a coherent mapping */
+		vaddr = dma_common_contiguous_remap(
+			pfn_to_page(pfn), size,
+			dma_pgprot(dev, PAGE_KERNEL, attrs),
+			__builtin_return_address(0));
+		if (!vaddr) {
+			swiotlb_tbl_release_region(dev, index, size);
+			return NULL;
+		}
+	} else {
+		vaddr = phys_to_virt(tlb_addr);
+	}
+
+	memset(vaddr, 0, size);
+
+	return vaddr;
+}
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+		  dma_addr_t dma_addr, unsigned long attrs)
+{
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+	unsigned int index;
+
+	if (!dev_is_dma_coherent(dev))
+		vunmap(vaddr);
+
+	index = (dma_addr - mem->start) >> IO_TLB_SHIFT;
+	swiotlb_tbl_release_region(dev, index, PAGE_ALIGN(size));
+}
+
 size_t swiotlb_max_mapping_size(struct device *dev)
 {
 	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
-- 
2.29.2.729.g45daf8777d-goog


^ permalink raw reply related

* [RFC PATCH v3 3/6] swiotlb: Use restricted DMA pool if available
From: Claire Chang @ 2021-01-06  3:41 UTC (permalink / raw)
  To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
	boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
	robin.murphy
  Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
	xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
	devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
	gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
	bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>

Regardless of swiotlb setting, the restricted DMA pool is preferred if
available.

The restricted DMA pools provide a basic level of protection against
the DMA overwriting buffer contents at unexpected times. However, to
protect against general data leakage and system memory corruption, the
system needs to provide a way to restrict the DMA to a predefined memory
region.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 drivers/iommu/dma-iommu.c | 12 ++++++------
 include/linux/swiotlb.h   | 17 +++++++++++------
 kernel/dma/direct.c       |  8 ++++----
 kernel/dma/direct.h       | 10 ++++++----
 kernel/dma/swiotlb.c      | 13 ++++++-------
 5 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f0305e6aac1b..1343cc2ef27a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -516,7 +516,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 
 	__iommu_dma_unmap(dev, dma_addr, size);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size,
 				iova_align(iovad, size), dir, attrs);
 }
@@ -592,7 +592,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	}
 
 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-	if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+	if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_unmap_single(dev, phys, org_size,
 				aligned_size, dir, attrs);
 
@@ -764,7 +764,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 	if (!dev_is_dma_coherent(dev))
 		arch_sync_dma_for_cpu(phys, size, dir);
 
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
 }
 
@@ -777,7 +777,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
 
 	if (!dev_is_dma_coherent(dev))
@@ -798,7 +798,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
 						dir, SYNC_FOR_CPU);
 	}
@@ -815,7 +815,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 		return;
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
 						dir, SYNC_FOR_DEVICE);
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a1bbd7788885..5135e5636042 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -2,12 +2,12 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/device.h>
 #include <linux/dma-direction.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/limits.h>
 
-struct device;
 struct page;
 struct scatterlist;
 
@@ -106,9 +106,14 @@ struct io_tlb_mem {
 };
 extern struct io_tlb_mem io_tlb_default_mem;
 
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline struct io_tlb_mem *get_io_tlb_mem(struct device *dev)
 {
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	return dev->dma_io_tlb_mem ? dev->dma_io_tlb_mem : &io_tlb_default_mem;
+}
+
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
+{
+	struct io_tlb_mem *mem = get_io_tlb_mem(dev);
 
 	return paddr >= mem->start && paddr < mem->end;
 }
@@ -116,11 +121,11 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
+bool is_swiotlb_active(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
 	return false;
 }
@@ -136,7 +141,7 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
 	return SIZE_MAX;
 }
 
-static inline bool is_swiotlb_active(void)
+static inline bool is_swiotlb_active(struct device *dev)
 {
 	return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 002268262c9a..30ccbc08e229 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -343,7 +343,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 	for_each_sg(sgl, sg, nents, i) {
 		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_tbl_sync_single(dev, paddr, sg->length,
 					dir, SYNC_FOR_DEVICE);
 
@@ -369,7 +369,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(paddr, sg->length, dir);
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
 					SYNC_FOR_CPU);
 
@@ -495,7 +495,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
 size_t dma_direct_max_mapping_size(struct device *dev)
 {
 	/* If SWIOTLB is active, use its maximum mapping size */
-	if (is_swiotlb_active() &&
+	if (is_swiotlb_active(dev) &&
 	    (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
 		return swiotlb_max_mapping_size(dev);
 	return SIZE_MAX;
@@ -504,7 +504,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
 bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
 {
 	return !dev_is_dma_coherent(dev) ||
-		is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+		is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
 }
 
 /**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index b98615578737..7188834cc4c7 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
 {
 	phys_addr_t paddr = dma_to_phys(dev, addr);
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
 
 	if (!dev_is_dma_coherent(dev))
@@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
 		arch_sync_dma_for_cpu_all();
 	}
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
 
 	if (dir == DMA_FROM_DEVICE)
@@ -87,8 +87,10 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 	phys_addr_t phys = page_to_phys(page) + offset;
 	dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-	if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+#ifdef CONFIG_SWIOTLB
+	if (unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dma_io_tlb_mem)
 		return swiotlb_map(dev, phys, size, dir, attrs);
+#endif
 
 	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
 		if (swiotlb_force != SWIOTLB_NO_FORCE)
@@ -113,7 +115,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
 }
 #endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7fb2ac087d23..1f05af09e61a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -222,7 +222,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 		mem->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
 	mem->index = 0;
-	no_iotlb_memory = false;
 
 	if (verbose)
 		swiotlb_print_info();
@@ -464,7 +463,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
 	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
 	unsigned long flags;
 	phys_addr_t tlb_addr;
@@ -475,7 +474,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	unsigned long max_slots;
 	unsigned long tmp_io_tlb_used;
 
-	if (no_iotlb_memory)
+	if (no_iotlb_memory && !hwdev->dma_io_tlb_mem)
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
 
 	if (mem_encrypt_active())
@@ -597,7 +596,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      size_t mapping_size, size_t alloc_size,
 			      enum dma_data_direction dir, unsigned long attrs)
 {
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
 	unsigned long flags;
 	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
@@ -645,7 +644,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
 {
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
 	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = mem->orig_addr[index];
 
@@ -712,13 +711,13 @@ size_t swiotlb_max_mapping_size(struct device *dev)
 	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
 }
 
-bool is_swiotlb_active(void)
+bool is_swiotlb_active(struct device *dev)
 {
 	/*
 	 * When SWIOTLB is initialized, even if mem->start points to physical
 	 * address zero, mem->end surely doesn't.
 	 */
-	return io_tlb_default_mem.end != 0;
+	return io_tlb_default_mem.end != 0 || dev->dma_io_tlb_mem;
 }
 
 #ifdef CONFIG_DEBUG_FS
-- 
2.29.2.729.g45daf8777d-goog


^ permalink raw reply related

* [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Claire Chang @ 2021-01-06  3:41 UTC (permalink / raw)
  To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
	boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
	robin.murphy
  Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
	xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
	devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
	gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
	bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>

Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes in the device tree.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/device.h  |   4 ++
 include/linux/swiotlb.h |   7 +-
 kernel/dma/Kconfig      |   1 +
 kernel/dma/swiotlb.c    | 144 ++++++++++++++++++++++++++++++++++------
 4 files changed, 131 insertions(+), 25 deletions(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 89bb8b84173e..ca6f71ec8871 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -413,6 +413,7 @@ struct dev_links_info {
  * @dma_pools:	Dma pools (if dma'ble device).
  * @dma_mem:	Internal for coherent mem override.
  * @cma_area:	Contiguous memory area for dma allocations
+ * @dma_io_tlb_mem: Internal for swiotlb io_tlb_mem override.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @fwnode:	Associated device node supplied by platform firmware.
@@ -515,6 +516,9 @@ struct device {
 #ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
+#endif
+#ifdef CONFIG_SWIOTLB
+	struct io_tlb_mem	*dma_io_tlb_mem;
 #endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index dd8eb57cbb8f..a1bbd7788885 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -76,12 +76,13 @@ extern enum swiotlb_force swiotlb_force;
  *
  * @start:	The start address of the swiotlb memory pool. Used to do a quick
  *		range check to see if the memory was in fact allocated by this
- *		API.
+ *		API. For restricted DMA pool, this is device tree adjustable.
  * @end:	The end address of the swiotlb memory pool. Used to do a quick
  *		range check to see if the memory was in fact allocated by this
- *		API.
+ *		API. For restricted DMA pool, this is device tree adjustable.
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
- *		@end. This is command line adjustable via setup_io_tlb_npages.
+ *		@end. For default swiotlb, this is command line adjustable via
+ *		setup_io_tlb_npages.
  * @used:	The number of used IO TLB block.
  * @list:	The free list describing the number of free entries available
  *		from each index.
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 479fc145acfc..131a0a66781b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -82,6 +82,7 @@ config ARCH_HAS_FORCE_DMA_UNENCRYPTED
 config SWIOTLB
 	bool
 	select NEED_DMA_MAP_STATE
+	select OF_EARLY_FLATTREE
 
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e4368159f88a..7fb2ac087d23 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -36,6 +36,11 @@
 #include <linux/scatterlist.h>
 #include <linux/mem_encrypt.h>
 #include <linux/set_memory.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/slab.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #endif
@@ -319,20 +324,21 @@ static void swiotlb_cleanup(void)
 	max_segment = 0;
 }
 
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+static int swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
+				   size_t size)
 {
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
-	unsigned long i, bytes;
+	unsigned long i;
+	void *vaddr = phys_to_virt(start);
 
-	bytes = nslabs << IO_TLB_SHIFT;
+	size = ALIGN(size, 1 << IO_TLB_SHIFT);
+	mem->nslabs = size >> IO_TLB_SHIFT;
+	mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
 
-	mem->nslabs = nslabs;
-	mem->start = virt_to_phys(tlb);
-	mem->end = mem->start + bytes;
+	mem->start = start;
+	mem->end = mem->start + size;
 
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	memset(tlb, 0, bytes);
+	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
+	memset(vaddr, 0, size);
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
@@ -356,13 +362,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 		mem->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
 	mem->index = 0;
-	no_iotlb_memory = false;
-
-	swiotlb_print_info();
-
-	late_alloc = 1;
-
-	swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
 
 	return 0;
 
@@ -375,6 +374,27 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	return -ENOMEM;
 }
 
+int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	unsigned long bytes = nslabs << IO_TLB_SHIFT;
+	int ret;
+
+	ret = swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), bytes);
+	if (ret)
+		return ret;
+
+	no_iotlb_memory = false;
+
+	swiotlb_print_info();
+
+	late_alloc = 1;
+
+	swiotlb_set_max_segment(bytes);
+
+	return 0;
+}
+
 void __init swiotlb_exit(void)
 {
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
@@ -703,16 +723,96 @@ bool is_swiotlb_active(void)
 
 #ifdef CONFIG_DEBUG_FS
 
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs(struct io_tlb_mem *mem, const char *name,
+				   struct dentry *node)
 {
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
-
-	mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+	mem->debugfs = debugfs_create_dir(name, node);
 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
 	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+	swiotlb_create_debugfs(&io_tlb_default_mem, "swiotlb", NULL);
+
 	return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+late_initcall(swiotlb_create_default_debugfs);
 
 #endif
+
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+				    struct device *dev)
+{
+	struct io_tlb_mem *mem = rmem->priv;
+	int ret;
+
+	if (dev->dma_io_tlb_mem)
+		return -EBUSY;
+
+	if (!mem) {
+		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+		if (!mem)
+			return -ENOMEM;
+
+		if (!memremap(rmem->base, rmem->size, MEMREMAP_WB)) {
+			ret = -EINVAL;
+			goto cleanup;
+		}
+
+		ret = swiotlb_init_io_tlb_mem(mem, rmem->base, rmem->size);
+		if (ret)
+			goto cleanup;
+
+		rmem->priv = mem;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	swiotlb_create_debugfs(mem, dev_name(dev), io_tlb_default_mem.debugfs);
+#endif
+
+	dev->dma_io_tlb_mem = mem;
+
+	return 0;
+
+cleanup:
+	kfree(mem);
+
+	return ret;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+					struct device *dev)
+{
+	if (!dev)
+		return;
+
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(dev->dma_io_tlb_mem->debugfs);
+#endif
+	dev->dma_io_tlb_mem = NULL;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+	.device_init	= rmem_swiotlb_device_init,
+	.device_release	= rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+	unsigned long node = rmem->fdt_node;
+
+	if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -EINVAL;
+
+	rmem->ops = &rmem_swiotlb_ops;
+	pr_info("Reserved memory: created device swiotlb memory pool at %pa, size %ld MiB\n",
+		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+	return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
-- 
2.29.2.729.g45daf8777d-goog


^ permalink raw reply related

* [RFC PATCH v3 1/6] swiotlb: Add io_tlb_mem struct
From: Claire Chang @ 2021-01-06  3:41 UTC (permalink / raw)
  To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
	boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
	robin.murphy
  Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
	xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
	devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
	gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
	bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>

Added a new struct, io_tlb_mem, as the IO TLB memory pool descriptor and
moved relevant global variables into that struct.
This will be useful later to allow for restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 arch/powerpc/platforms/pseries/svm.c |   4 +-
 drivers/xen/swiotlb-xen.c            |   4 +-
 include/linux/swiotlb.h              |  39 +++-
 kernel/dma/swiotlb.c                 | 292 +++++++++++++--------------
 4 files changed, 178 insertions(+), 161 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 7b739cc7a8a9..2b767f1ca5fd 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -55,8 +55,8 @@ void __init svm_swiotlb_init(void)
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
 		return;
 
-	if (io_tlb_start)
-		memblock_free_early(io_tlb_start,
+	if (io_tlb_default_mem.start)
+		memblock_free_early(io_tlb_default_mem.start,
 				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	panic("SVM: Cannot allocate SWIOTLB buffer");
 }
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2b385c1b4a99..4d17dff7ffd2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -192,8 +192,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	/*
 	 * IO TLB memory already allocated. Just use it.
 	 */
-	if (io_tlb_start != 0) {
-		xen_io_tlb_start = phys_to_virt(io_tlb_start);
+	if (io_tlb_default_mem.start != 0) {
+		xen_io_tlb_start = phys_to_virt(io_tlb_default_mem.start);
 		goto end;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c9fc9ca5d2..dd8eb57cbb8f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -70,11 +70,46 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
-extern phys_addr_t io_tlb_start, io_tlb_end;
+
+/**
+ * struct io_tlb_mem - IO TLB Memory Pool Descriptor
+ *
+ * @start:	The start address of the swiotlb memory pool. Used to do a quick
+ *		range check to see if the memory was in fact allocated by this
+ *		API.
+ * @end:	The end address of the swiotlb memory pool. Used to do a quick
+ *		range check to see if the memory was in fact allocated by this
+ *		API.
+ * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
+ *		@end. This is command line adjustable via setup_io_tlb_npages.
+ * @used:	The number of used IO TLB block.
+ * @list:	The free list describing the number of free entries available
+ *		from each index.
+ * @index:	The index to start searching in the next round.
+ * @orig_addr:	The original address corresponding to a mapped entry for the
+ *		sync operations.
+ * @lock:	The lock to protect the above data structures in the map and
+ *		unmap calls.
+ * @debugfs:	The dentry to debugfs.
+ */
+struct io_tlb_mem {
+	phys_addr_t start;
+	phys_addr_t end;
+	unsigned long nslabs;
+	unsigned long used;
+	unsigned int *list;
+	unsigned int index;
+	phys_addr_t *orig_addr;
+	spinlock_t lock;
+	struct dentry *debugfs;
+};
+extern struct io_tlb_mem io_tlb_default_mem;
 
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return paddr >= io_tlb_start && paddr < io_tlb_end;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+	return paddr >= mem->start && paddr < mem->end;
 }
 
 void __init swiotlb_exit(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..e4368159f88a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -61,33 +61,11 @@
  * allocate a contiguous 1MB, we're probably in trouble anyway.
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
 enum swiotlb_force swiotlb_force;
 
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+struct io_tlb_mem io_tlb_default_mem;
 
 /*
  * Max segment that we can provide which (if pages are contingous) will
@@ -95,27 +73,17 @@ static unsigned int io_tlb_index;
  */
 static unsigned int max_segment;
 
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
 static int late_alloc;
 
 static int __init
 setup_io_tlb_npages(char *str)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+
 	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
+		mem->nslabs = simple_strtoul(str, &str, 0);
 		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
 	}
 	if (*str == ',')
 		++str;
@@ -123,7 +91,7 @@ setup_io_tlb_npages(char *str)
 		swiotlb_force = SWIOTLB_FORCE;
 	} else if (!strcmp(str, "noforce")) {
 		swiotlb_force = SWIOTLB_NO_FORCE;
-		io_tlb_nslabs = 1;
+		mem->nslabs = 1;
 	}
 
 	return 0;
@@ -134,7 +102,7 @@ static bool no_iotlb_memory;
 
 unsigned long swiotlb_nr_tbl(void)
 {
-	return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
+	return unlikely(no_iotlb_memory) ? 0 : io_tlb_default_mem.nslabs;
 }
 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 
@@ -156,13 +124,14 @@ unsigned long swiotlb_size_or_default(void)
 {
 	unsigned long size;
 
-	size = io_tlb_nslabs << IO_TLB_SHIFT;
+	size = io_tlb_default_mem.nslabs << IO_TLB_SHIFT;
 
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
 void __init swiotlb_adjust_size(unsigned long new_size)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long size;
 
 	/*
@@ -170,10 +139,10 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 	 * architectures such as those supporting memory encryption to
 	 * adjust/expand SWIOTLB size for their use.
 	 */
-	if (!io_tlb_nslabs) {
+	if (!mem->nslabs) {
 		size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
-		io_tlb_nslabs = size >> IO_TLB_SHIFT;
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		mem->nslabs = size >> IO_TLB_SHIFT;
+		mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
 
 		pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
 	}
@@ -181,14 +150,15 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 
 void swiotlb_print_info(void)
 {
-	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	unsigned long bytes = mem->nslabs << IO_TLB_SHIFT;
 
 	if (no_iotlb_memory) {
 		pr_warn("No low mem\n");
 		return;
 	}
 
-	pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end,
+	pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
 	       bytes >> 20);
 }
 
@@ -200,57 +170,59 @@ void swiotlb_print_info(void)
  */
 void __init swiotlb_update_mem_attributes(void)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	void *vaddr;
 	unsigned long bytes;
 
 	if (no_iotlb_memory || late_alloc)
 		return;
 
-	vaddr = phys_to_virt(io_tlb_start);
-	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
+	vaddr = phys_to_virt(mem->start);
+	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
 	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long i, bytes;
 	size_t alloc_size;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = __pa(tlb);
-	io_tlb_end = io_tlb_start + bytes;
+	mem->nslabs = nslabs;
+	mem->start = __pa(tlb);
+	mem->end = mem->start + bytes;
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
+	 * between mem->start and mem->end.
 	 */
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
-	io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_list)
+	alloc_size = PAGE_ALIGN(mem->nslabs * sizeof(int));
+	mem->list = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!mem->list)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
-	io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_orig_addr)
+	alloc_size = PAGE_ALIGN(mem->nslabs * sizeof(phys_addr_t));
+	mem->orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!mem->orig_addr)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < mem->nslabs; i++) {
+		mem->list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		mem->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index = 0;
+	mem->index = 0;
 	no_iotlb_memory = false;
 
 	if (verbose)
 		swiotlb_print_info();
 
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
 	return 0;
 }
 
@@ -261,26 +233,27 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 void  __init
 swiotlb_init(int verbose)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	size_t default_size = IO_TLB_DEFAULT_SIZE;
 	unsigned char *vstart;
 	unsigned long bytes;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	if (!mem->nslabs) {
+		mem->nslabs = (default_size >> IO_TLB_SHIFT);
+		mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
 	}
 
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	bytes = mem->nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
 	vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+	if (vstart && !swiotlb_init_with_tbl(vstart, mem->nslabs, verbose))
 		return;
 
-	if (io_tlb_start) {
-		memblock_free_early(io_tlb_start,
-				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-		io_tlb_start = 0;
+	if (mem->start) {
+		memblock_free_early(mem->start,
+				    PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT));
+		mem->start = 0;
 	}
 	pr_warn("Cannot allocate buffer");
 	no_iotlb_memory = true;
@@ -294,22 +267,23 @@ swiotlb_init(int verbose)
 int
 swiotlb_late_init_with_default_size(size_t default_size)
 {
-	unsigned long bytes, req_nslabs = io_tlb_nslabs;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	unsigned long bytes, req_nslabs = mem->nslabs;
 	unsigned char *vstart = NULL;
 	unsigned int order;
 	int rc = 0;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	if (!mem->nslabs) {
+		mem->nslabs = (default_size >> IO_TLB_SHIFT);
+		mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
 	}
 
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	order = get_order(mem->nslabs << IO_TLB_SHIFT);
+	mem->nslabs = SLABS_PER_PAGE << order;
+	bytes = mem->nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
 		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
@@ -320,15 +294,15 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	}
 
 	if (!vstart) {
-		io_tlb_nslabs = req_nslabs;
+		mem->nslabs = req_nslabs;
 		return -ENOMEM;
 	}
 	if (order != get_order(bytes)) {
 		pr_warn("only able to allocate %ld MB\n",
 			(PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
+		mem->nslabs = SLABS_PER_PAGE << order;
 	}
-	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
+	rc = swiotlb_late_init_with_tbl(vstart, mem->nslabs);
 	if (rc)
 		free_pages((unsigned long)vstart, order);
 
@@ -337,22 +311,25 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 static void swiotlb_cleanup(void)
 {
-	io_tlb_end = 0;
-	io_tlb_start = 0;
-	io_tlb_nslabs = 0;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+	mem->end = 0;
+	mem->start = 0;
+	mem->nslabs = 0;
 	max_segment = 0;
 }
 
 int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long i, bytes;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = virt_to_phys(tlb);
-	io_tlb_end = io_tlb_start + bytes;
+	mem->nslabs = nslabs;
+	mem->start = virt_to_phys(tlb);
+	mem->end = mem->start + bytes;
 
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
@@ -360,39 +337,39 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
+	 * between mem->start and mem->end.
 	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
+	mem->list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+	                              get_order(mem->nslabs * sizeof(int)));
+	if (!mem->list)
 		goto cleanup3;
 
-	io_tlb_orig_addr = (phys_addr_t *)
+	mem->orig_addr = (phys_addr_t *)
 		__get_free_pages(GFP_KERNEL,
-				 get_order(io_tlb_nslabs *
+				 get_order(mem->nslabs *
 					   sizeof(phys_addr_t)));
-	if (!io_tlb_orig_addr)
+	if (!mem->orig_addr)
 		goto cleanup4;
 
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < mem->nslabs; i++) {
+		mem->list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		mem->orig_addr[i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index = 0;
+	mem->index = 0;
 	no_iotlb_memory = false;
 
 	swiotlb_print_info();
 
 	late_alloc = 1;
 
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
 
 	return 0;
 
 cleanup4:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-	                                                 sizeof(int)));
-	io_tlb_list = NULL;
+	free_pages((unsigned long)mem->list,
+		   get_order(mem->nslabs * sizeof(int)));
+	mem->list = NULL;
 cleanup3:
 	swiotlb_cleanup();
 	return -ENOMEM;
@@ -400,23 +377,25 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
 void __init swiotlb_exit(void)
 {
-	if (!io_tlb_orig_addr)
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+	if (!mem->orig_addr)
 		return;
 
 	if (late_alloc) {
-		free_pages((unsigned long)io_tlb_orig_addr,
-			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-								 sizeof(int)));
-		free_pages((unsigned long)phys_to_virt(io_tlb_start),
-			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+		free_pages((unsigned long)mem->orig_addr,
+			   get_order(mem->nslabs * sizeof(phys_addr_t)));
+		free_pages((unsigned long)mem->list,
+			   get_order(mem->nslabs * sizeof(int)));
+		free_pages((unsigned long)phys_to_virt(mem->start),
+			   get_order(mem->nslabs << IO_TLB_SHIFT));
 	} else {
-		memblock_free_late(__pa(io_tlb_orig_addr),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		memblock_free_late(__pa(io_tlb_list),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		memblock_free_late(io_tlb_start,
-				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_late(__pa(mem->orig_addr),
+				   PAGE_ALIGN(mem->nslabs * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(mem->list),
+				   PAGE_ALIGN(mem->nslabs * sizeof(int)));
+		memblock_free_late(mem->start,
+				   PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT));
 	}
 	swiotlb_cleanup();
 }
@@ -465,7 +444,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
 	unsigned long flags;
 	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
@@ -516,13 +496,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	 * Find suitable number of IO TLB entries size that will fit this
 	 * request and allocate a buffer from that IO TLB pool.
 	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
+	spin_lock_irqsave(&mem->lock, flags);
 
-	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+	if (unlikely(nslots > mem->nslabs - mem->used))
 		goto not_found;
 
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
+	index = ALIGN(mem->index, stride);
+	if (index >= mem->nslabs)
 		index = 0;
 	wrap = index;
 
@@ -530,7 +510,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		while (iommu_is_span_boundary(index, nslots, offset_slots,
 					      max_slots)) {
 			index += stride;
-			if (index >= io_tlb_nslabs)
+			if (index >= mem->nslabs)
 				index = 0;
 			if (index == wrap)
 				goto not_found;
@@ -541,40 +521,40 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (io_tlb_list[index] >= nslots) {
+		if (mem->list[index] >= nslots) {
 			int count = 0;
 
 			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+				mem->list[i] = 0;
+			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && mem->list[i]; i--)
+				mem->list[i] = ++count;
+			tlb_addr = mem->start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
 			 * round.
 			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
-					? (index + nslots) : 0);
+			mem->index = ((index + nslots) < mem->nslabs
+				      ? (index + nslots) : 0);
 
 			goto found;
 		}
 		index += stride;
-		if (index >= io_tlb_nslabs)
+		if (index >= mem->nslabs)
 			index = 0;
 	} while (index != wrap);
 
 not_found:
-	tmp_io_tlb_used = io_tlb_used;
+	tmp_io_tlb_used = mem->used;
 
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	spin_unlock_irqrestore(&mem->lock, flags);
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
+			 alloc_size, mem->nslabs, tmp_io_tlb_used);
 	return (phys_addr_t)DMA_MAPPING_ERROR;
 found:
-	io_tlb_used += nslots;
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	mem->used += nslots;
+	spin_unlock_irqrestore(&mem->lock, flags);
 
 	/*
 	 * Save away the mapping from the original address to the DMA address.
@@ -582,7 +562,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+		mem->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -597,10 +577,11 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      size_t mapping_size, size_t alloc_size,
 			      enum dma_data_direction dir, unsigned long attrs)
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long flags;
 	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = mem->orig_addr[index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
@@ -616,36 +597,37 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
+	spin_lock_irqsave(&mem->lock, flags);
 	{
 		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
+			 mem->list[index + nslots] : 0);
 		/*
 		 * Step 1: return the slots to the free list, merging the
 		 * slots with superceeding slots
 		 */
 		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[i] = ++count;
-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+			mem->list[i] = ++count;
+			mem->orig_addr[i] = INVALID_PHYS_ADDR;
 		}
 		/*
 		 * Step 2: merge the returned slots with the preceding slots,
 		 * if available (non zero)
 		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
+		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && mem->list[i]; i--)
+			mem->list[i] = ++count;
 
-		io_tlb_used -= nslots;
+		mem->used -= nslots;
 	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	spin_unlock_irqrestore(&mem->lock, flags);
 }
 
 void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
 {
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = mem->orig_addr[index];
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
@@ -713,21 +695,21 @@ size_t swiotlb_max_mapping_size(struct device *dev)
 bool is_swiotlb_active(void)
 {
 	/*
-	 * When SWIOTLB is initialized, even if io_tlb_start points to physical
-	 * address zero, io_tlb_end surely doesn't.
+	 * When SWIOTLB is initialized, even if mem->start points to physical
+	 * address zero, mem->end surely doesn't.
 	 */
-	return io_tlb_end != 0;
+	return io_tlb_default_mem.end != 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
 
 static int __init swiotlb_create_debugfs(void)
 {
-	struct dentry *root;
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 
-	root = debugfs_create_dir("swiotlb", NULL);
-	debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs);
-	debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used);
+	mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
+	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
 	return 0;
 }
 
-- 
2.29.2.729.g45daf8777d-goog

To make this change as mechanical as possible, I didn't fix any
checkpatch.pl ERROR/WARNING.

^ permalink raw reply related

* [RFC PATCH v3 0/6] Restricted DMA
From: Claire Chang @ 2021-01-06  3:41 UTC (permalink / raw)
  To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
	boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
	robin.murphy
  Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
	xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
	devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
	gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
	bauerman

This series implements mitigations for lack of DMA access control on
systems without an IOMMU, which could result in the DMA accessing the
system memory at unexpected times and/or unexpected addresses, possibly
leading to data leakage or corruption.

For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
not behind an IOMMU. As PCI-e, by design, gives the device full access to
system memory, a vulnerability in the Wi-Fi firmware could easily escalate
to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
full chain of exploits; [2], [3]).

To mitigate the security concerns, we introduce restricted DMA. Restricted
DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
specially allocated region and does memory allocation from the same region.
The feature on its own provides a basic level of protection against the DMA
overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system needs
to provide a way to restrict the DMA to a predefined memory region (this is
usually done at firmware level, e.g. in ATF on some ARM platforms).

[1a] https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
[1b] https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
[2] https://blade.tencent.com/en/advisories/qualpwn/
[3] https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/

Claire Chang (6):
  swiotlb: Add io_tlb_mem struct
  swiotlb: Add restricted DMA pool
  swiotlb: Use restricted DMA pool if available
  swiotlb: Add restricted DMA alloc/free support.
  dt-bindings: of: Add restricted DMA pool
  of: Add plumbing for restricted DMA pool

 .../reserved-memory/reserved-memory.txt       |  24 +
 arch/powerpc/platforms/pseries/svm.c          |   4 +-
 drivers/iommu/dma-iommu.c                     |  12 +-
 drivers/of/address.c                          |  21 +
 drivers/of/device.c                           |   4 +
 drivers/of/of_private.h                       |   5 +
 drivers/xen/swiotlb-xen.c                     |   4 +-
 include/linux/device.h                        |   4 +
 include/linux/swiotlb.h                       |  61 +-
 kernel/dma/Kconfig                            |   1 +
 kernel/dma/direct.c                           |  20 +-
 kernel/dma/direct.h                           |  10 +-
 kernel/dma/swiotlb.c                          | 576 +++++++++++-------
 13 files changed, 514 insertions(+), 232 deletions(-)

-- 
2.29.2.729.g45daf8777d-goog

v3: 
  Using only one reserved memory region for both streaming DMA and memory
  allocation.

v2:
  Building on top of swiotlb.
  https://lore.kernel.org/patchwork/cover/1280705/

v1:
  Using dma_map_ops.
  https://lore.kernel.org/patchwork/cover/1271660/

^ permalink raw reply

* Re: [PATCH v2] net: ethernet: fs_enet: Add missing MODULE_LICENSE
From: David Miller @ 2021-01-06  0:57 UTC (permalink / raw)
  To: mpe; +Cc: andrew, netdev, linux-kernel, linuxppc-dev, kuba
In-Reply-To: <20210105091515.87509-1-mpe@ellerman.id.au>

From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue,  5 Jan 2021 20:15:15 +1100

> Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
> into error") the ppc32_allmodconfig build fails with:
> 
>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o
> 
> Add the missing MODULE_LICENSEs to fix the build. Both files include a
> copyright header indicating they are GPL v2.
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Applied.

^ permalink raw reply

* Re: [RFC please help] membarrier: Rewrite sync_core_before_usermode()
From: Will Deacon @ 2021-01-05 22:41 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Arnd Bergmann, X86 ML, LKML, Nicholas Piggin, Mathieu Desnoyers,
	Andy Lutomirski, Catalin Marinas, Paul Mackerras, stable,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <7BFAB97C-1949-46A3-A1E2-DFE108DC7D5E@amacapital.net>

On Tue, Jan 05, 2021 at 08:20:51AM -0800, Andy Lutomirski wrote:
> > On Jan 5, 2021, at 5:26 AM, Will Deacon <will@kernel.org> wrote:
> > Sorry for the slow reply, I was socially distanced from my keyboard.
> > 
> >> On Mon, Dec 28, 2020 at 04:36:11PM -0800, Andy Lutomirski wrote:
> >> On Mon, Dec 28, 2020 at 4:11 PM Nicholas Piggin <npiggin@gmail.com> wrote:
> >>>> +static inline void membarrier_sync_core_before_usermode(void)
> >>>> +{
> >>>> +     /*
> >>>> +      * XXX: I know basically nothing about powerpc cache management.
> >>>> +      * Is this correct?
> >>>> +      */
> >>>> +     isync();
> >>> 
> >>> This is not about memory ordering or cache management, it's about
> >>> pipeline management. Powerpc's return to user mode serializes the
> >>> CPU (aka the hardware thread, _not_ the core; another wrongness of
> >>> the name, but AFAIKS the HW thread is what is required for
> >>> membarrier). So this is wrong, powerpc needs nothing here.
> >> 
> >> Fair enough.  I'm happy to defer to you on the powerpc details.  In
> >> any case, this just illustrates that we need feedback from a person
> >> who knows more about ARM64 than I do.
> > 
> > I think we're in a very similar boat to PowerPC, fwiw. Roughly speaking:
> > 
> >  1. SYNC_CORE does _not_ perform any cache management; that is the
> >     responsibility of userspace, either by executing the relevant
> >     maintenance instructions (arm64) or a system call (arm32). Crucially,
> >     the hardware will ensure that this cache maintenance is broadcast
> >     to all other CPUs.
> 
> Is this guaranteed regardless of any aliases?  That is, if I flush from
> one CPU at one VA and then execute the same physical address from another
> CPU at a different VA, does this still work?

The data side will be fine, but the instruction side can have virtual
aliases. We handle this in flush_ptrace_access() by blowing away the whole
I-cache if we're not physically-indexed, but userspace would be in trouble
if it wanted to handle this situation alone.

> >  2. Even with all the cache maintenance in the world, a CPU could have
> >     speculatively fetched stale instructions into its "pipeline" ahead of
> >     time, and these are _not_ flushed by the broadcast maintenance instructions
> >     in (1). SYNC_CORE provides a means for userspace to discard these stale
> >     instructions.
> > 
> >  3. The context synchronization event on exception entry/exit is
> >     sufficient here. The Arm ARM isn't very good at describing what it
> >     does, because it's in denial about the existence of a pipeline, but
> >     it does have snippets such as:
> > 
> >    (s/PE/CPU/)
> >       | For all types of memory:
> >       | The PE might have fetched the instructions from memory at any time
> >       | since the last Context synchronization event on that PE.
> > 
> >     Interestingly, the architecture recently added a control bit to remove
> >     this synchronisation from exception return, so if we set that then we'd
> >     have a problem with SYNC_CORE and adding an ISB would be necessary (and
> >     we could probable then make kernel->kernel returns cheaper, but I
> >     suspect we're relying on this implicit synchronisation in other places
> >     too).
> > 
> 
> Is ISB just a context synchronization event or does it do more?

That's a good question. Barrier instructions on ARM do tend to get
overloaded with extra behaviours over time, so it could certainly end up
doing the context synchronization event + extra stuff in future. Right now,
the only thing that springs to mind is the spectre-v1 heavy mitigation
barrier of 'DSB; ISB' which, for example, probably doesn't work for 'DSB;
ERET' because the ERET can be treated like a conditional (!) branch.

> On x86, it’s very hard to tell that MFENCE does any more than LOCK, but
> it’s much slower.  And we have LFENCE, which, as documented, doesn’t
> appear to have any semantics at all.  (Or at least it didn’t before
> Spectre.)

I tend to think of ISB as a front-end barrier relating to instruction fetch
whereas DMB, acquire/release and DSB are all back-end barriers relating to
memory accesses. You _can_ use ISB in conjunction with control dependencies
to order a pair of loads (like you can with ISYNC on Power), but it's a
really expensive way to do it.

> > Are you seeing a problem in practice, or did this come up while trying to
> > decipher the semantics of SYNC_CORE?
> 
> It came up while trying to understand the code and work through various
> bugs in it.  The code was written using something approximating x86
> terminology, but it was definitely wrong on x86 (at least if you believe
> the SDM, and I haven’t convinced any architects to say otherwise).

Ok, thanks.

Will

^ permalink raw reply

* Power9 NV linux-next random process hang
From: Qian Cai @ 2021-01-05 22:36 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: linux-kernel

.config: https://cailca.coding.net/public/linux/mm/git/files/master/powerpc.config

Today's linux-next starts to generate random process hang quite easily.
Yesterday's build seems work fine. Sometimes, the process stack seems corrupt
while the process is running 100% CPU with gdb shows it just entered a
subroutine that really can't see why it hangs.

[ 6732.309621][T11627] task:ranbug          state:R  running task     stack:24176 pid: 2893 ppid:  2867 flags:0x00040000 
[ 6732.309779][T11627] Call Trace: 
[ 6732.309826][T11627] [c00000006166fa30] [c00000006166fb60] 0xc00000006166fb60 (unreliable) 

Also, running LTP syscalls ended up hanging with lots of zombie process. Any idea?

root        2023  0.0  0.0      0     0 ?        Zs   14:10   0:00 [login] <defunct>
root       52052  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [recv01] <defunct>
root       52054  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [recvfrom01] <defunct>
root       52056  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [recvmsg01] <defunct>
root       52155  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [rt_sigtimedwait] <defunct>
root       52305  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [semctl01] <defunct>
root       52362  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [send01] <defunct>
root       52386  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04] <defunct>
root       52387  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04] <defunct>
root       52388  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04] <defunct>
root       52389  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04] <defunct>
root       52390  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04] <defunct>
root       52392  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04_64] <defunct>
root       52393  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04_64] <defunct>
root       52394  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04_64] <defunct>
root       52395  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04_64] <defunct>
root       52396  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile04_64] <defunct>
root       52398  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile05] <defunct>
root       52400  0.0  0.0      0     0 pts/0    Z    15:03   0:00 [sendfile05_64] <defunct>
root       52415  0.0  0.0      0     0 pts/0    Z    15:04   0:00 [sendmsg01] <defunct>
root       53470  0.0  0.0      0     0 pts/0    Z    15:04   0:00 [sendto01] <defunct>
root       53763  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53764  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53765  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53766  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53767  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53768  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53769  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53770  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53771  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53772  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53773  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53774  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53775  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53776  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53777  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53778  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53779  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53780  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
root       53782  0.0  0.0      0     0 pts/0    Z    15:06   0:00 [setrlimit01] <defunct>
nobody     54290  0.0  0.0      0     0 pts/0    Z    15:07   0:00 [sysctl03] <defunct>
root       56813  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56814  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56815  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56816  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56817  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56818  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56819  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56820  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56821  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56822  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56823  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56825  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56826  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56827  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56828  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56829  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56830  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56831  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56832  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56833  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56834  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56835  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56836  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid03] <defunct>
root       56838  0.0  0.0      0     0 pts/0    Z    16:09   0:00 [waitpid04] <defunct>
sshd       58675  0.0  0.0      0     0 ?        Z    17:21   0:00 [sshd] <defunct>


^ permalink raw reply

* Re: [PATCH v2 -next] misc: ocxl: use DEFINE_MUTEX() for mutex lock
From: Andrew Donnellan @ 2021-01-05 21:53 UTC (permalink / raw)
  To: Zheng Yongjun, linuxppc-dev, linux-kernel; +Cc: fbarrat, gregkh, arnd
In-Reply-To: <20201224132446.31286-1-zhengyongjun3@huawei.com>

On 25/12/20 12:24 am, Zheng Yongjun wrote:
> mutex lock can be initialized automatically with DEFINE_MUTEX()
> rather than explicitly calling mutex_init().
> 
> Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>

Acked-by: Andrew Donnellan <ajd@linux.ibm.com>


-- 
Andrew Donnellan              OzLabs, ADL Canberra
ajd@linux.ibm.com             IBM Australia Limited

^ permalink raw reply

* Re: [RFC please help] membarrier: Rewrite sync_core_before_usermode()
From: Peter Zijlstra @ 2021-01-05 16:37 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linuxppc-dev, Arnd Bergmann, X86 ML, LKML, Nicholas Piggin,
	Mathieu Desnoyers, Andy Lutomirski, Catalin Marinas,
	Paul Mackerras, stable, Will Deacon, linux-arm-kernel
In-Reply-To: <7BFAB97C-1949-46A3-A1E2-DFE108DC7D5E@amacapital.net>

On Tue, Jan 05, 2021 at 08:20:51AM -0800, Andy Lutomirski wrote:
> >     Interestingly, the architecture recently added a control bit to remove
> >     this synchronisation from exception return, so if we set that then we'd
> >     have a problem with SYNC_CORE and adding an ISB would be necessary (and
> >     we could probable then make kernel->kernel returns cheaper, but I
> >     suspect we're relying on this implicit synchronisation in other places
> >     too).
> > 
> 
> Is ISB just a context synchronization event or does it do more?

IIRC it just the instruction sync (like power ISYNC).

> On x86, it’s very hard to tell that MFENCE does any more than LOCK,
> but it’s much slower.  And we have LFENCE, which, as documented,
> doesn’t appear to have any semantics at all.  (Or at least it didn’t
> before Spectre.)

AFAIU MFENCE is a completion barrier, while LOCK prefix is not. A bit
like ARM's DSB vs DMB.

It is for this reason that mb() is still MFENCE, while our smp_mb() is a
LOCK prefixed NO-OP.

And yes, LFENCE used to be poorly defined and it was sometimes
understood to be a completion barrier relative to prior LOADs, while it
is now a completion barrier for any prior instruction, and really should
be renamed to IFENCE.



^ permalink raw reply

* Re: [RFC please help] membarrier: Rewrite sync_core_before_usermode()
From: Andy Lutomirski @ 2021-01-05 16:20 UTC (permalink / raw)
  To: Will Deacon
  Cc: Arnd Bergmann, X86 ML, LKML, Nicholas Piggin, Mathieu Desnoyers,
	Andy Lutomirski, Catalin Marinas, Paul Mackerras, stable,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210105132623.GB11108@willie-the-truck>



> On Jan 5, 2021, at 5:26 AM, Will Deacon <will@kernel.org> wrote:
> 
> Hi Andy,
> 
> Sorry for the slow reply, I was socially distanced from my keyboard.
> 
>> On Mon, Dec 28, 2020 at 04:36:11PM -0800, Andy Lutomirski wrote:
>> On Mon, Dec 28, 2020 at 4:11 PM Nicholas Piggin <npiggin@gmail.com> wrote:
>>>> +static inline void membarrier_sync_core_before_usermode(void)
>>>> +{
>>>> +     /*
>>>> +      * XXX: I know basically nothing about powerpc cache management.
>>>> +      * Is this correct?
>>>> +      */
>>>> +     isync();
>>> 
>>> This is not about memory ordering or cache management, it's about
>>> pipeline management. Powerpc's return to user mode serializes the
>>> CPU (aka the hardware thread, _not_ the core; another wrongness of
>>> the name, but AFAIKS the HW thread is what is required for
>>> membarrier). So this is wrong, powerpc needs nothing here.
>> 
>> Fair enough.  I'm happy to defer to you on the powerpc details.  In
>> any case, this just illustrates that we need feedback from a person
>> who knows more about ARM64 than I do.
> 
> I think we're in a very similar boat to PowerPC, fwiw. Roughly speaking:
> 
>  1. SYNC_CORE does _not_ perform any cache management; that is the
>     responsibility of userspace, either by executing the relevant
>     maintenance instructions (arm64) or a system call (arm32). Crucially,
>     the hardware will ensure that this cache maintenance is broadcast
>     to all other CPUs.

Is this guaranteed regardless of any aliases?  That is, if I flush from one CPU at one VA and then execute the same physical address from another CPU at a different VA, does this still work?

> 
>  2. Even with all the cache maintenance in the world, a CPU could have
>     speculatively fetched stale instructions into its "pipeline" ahead of
>     time, and these are _not_ flushed by the broadcast maintenance instructions
>     in (1). SYNC_CORE provides a means for userspace to discard these stale
>     instructions.
> 
>  3. The context synchronization event on exception entry/exit is
>     sufficient here. The Arm ARM isn't very good at describing what it
>     does, because it's in denial about the existence of a pipeline, but
>     it does have snippets such as:
> 
>    (s/PE/CPU/)
>       | For all types of memory:
>       | The PE might have fetched the instructions from memory at any time
>       | since the last Context synchronization event on that PE.
> 
>     Interestingly, the architecture recently added a control bit to remove
>     this synchronisation from exception return, so if we set that then we'd
>     have a problem with SYNC_CORE and adding an ISB would be necessary (and
>     we could probable then make kernel->kernel returns cheaper, but I
>     suspect we're relying on this implicit synchronisation in other places
>     too).
> 

Is ISB just a context synchronization event or does it do more?

On x86, it’s very hard to tell that MFENCE does any more than LOCK, but it’s much slower.  And we have LFENCE, which, as documented, doesn’t appear to have any semantics at all.  (Or at least it didn’t before Spectre.)

> Are you seeing a problem in practice, or did this come up while trying to
> decipher the semantics of SYNC_CORE?

It came up while trying to understand the code and work through various bugs in it.  The code was written using something approximating x86 terminology, but it was definitely wrong on x86 (at least if you believe the SDM, and I haven’t convinced any architects to say otherwise).

Thanks!

> 
> Will

^ permalink raw reply

* Re: [PATCH v2] net: ethernet: fs_enet: Add missing MODULE_LICENSE
From: Andrew Lunn @ 2021-01-05 14:22 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: netdev, linux-kernel, linuxppc-dev, kuba, davem
In-Reply-To: <20210105091515.87509-1-mpe@ellerman.id.au>

On Tue, Jan 05, 2021 at 08:15:15PM +1100, Michael Ellerman wrote:
> Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
> into error") the ppc32_allmodconfig build fails with:
> 
>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o
> 
> Add the missing MODULE_LICENSEs to fix the build. Both files include a
> copyright header indicating they are GPL v2.
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH 01/20] ethernet: ucc_geth: set dev->max_mtu to 1518
From: Andrew Lunn @ 2021-01-05 14:54 UTC (permalink / raw)
  To: Joakim Tjernlund
  Cc: f.fainelli@gmail.com, rasmus.villemoes@prevas.dk,
	vladimir.oltean@nxp.com, linux-kernel@vger.kernel.org,
	murali.policharla@broadcom.com, netdev@vger.kernel.org,
	kuba@kernel.org, leoyang.li@nxp.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net,
	qiang.zhao@nxp.com
In-Reply-To: <6c56889ce3d0e9fc7a6ca7e7a43091b1ae8cd309.camel@infinera.com>

> Hi Andrew
> 
> I found here: 
> https://patchwork.kernel.org/project/netdevbpf/patch/20201218105538.30563-2-rasmus.villemoes@prevas.dk/
> 
> Seem to be underway but stable isn't included, I think it should be.

Stable should happen, since this was posted with [net,v2,3/3]. David
or Jakub handles stable for netdev. Give it a few more days. If not,
ask Jakub what is happening.

    Andrew

^ permalink raw reply

* Re: [PATCH 01/20] ethernet: ucc_geth: set dev->max_mtu to 1518
From: Joakim Tjernlund @ 2021-01-05 14:44 UTC (permalink / raw)
  To: andrew@lunn.ch
  Cc: f.fainelli@gmail.com, rasmus.villemoes@prevas.dk,
	vladimir.oltean@nxp.com, linux-kernel@vger.kernel.org,
	murali.policharla@broadcom.com, netdev@vger.kernel.org,
	kuba@kernel.org, leoyang.li@nxp.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net,
	qiang.zhao@nxp.com
In-Reply-To: <X/R4tqny72Bjt28b@lunn.ch>

On Tue, 2021-01-05 at 15:33 +0100, Andrew Lunn wrote:
> On Tue, Jan 05, 2021 at 02:17:42PM +0000, Joakim Tjernlund wrote:
> > On Thu, 2020-12-10 at 02:25 +0100, Andrew Lunn wrote:
> > > On Sat, Dec 05, 2020 at 08:17:24PM +0100, Rasmus Villemoes wrote:
> > > > All the buffers and registers are already set up appropriately for an
> > > > MTU slightly above 1500, so we just need to expose this to the
> > > > networking stack. AFAICT, there's no need to implement .ndo_change_mtu
> > > > when the receive buffers are always set up to support the max_mtu.
> > > > 
> > > > This fixes several warnings during boot on our mpc8309-board with an
> > > > embedded mv88e6250 switch:
> > > > 
> > > > mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 0
> > > > ...
> > > > mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 4
> > > > ucc_geth e0102000.ethernet eth1: error -22 setting MTU to 1504 to include DSA overhead
> > > > 
> > > > The last line explains what the DSA stack tries to do: achieving an MTU
> > > > of 1500 on-the-wire requires that the master netdevice connected to
> > > > the CPU port supports an MTU of 1500+the tagging overhead.
> > > > 
> > > > Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
> > > > Cc: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > > Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
> > > 
> > > Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> > > 
> > >     Andrew
> > 
> > I don't see this in any kernel, seems stuck? Maybe because the series as a whole is not approved?
> 
> Hi Joakim
> 
> When was it posted? If it was while netdev was closed during the merge
> window, you need to repost.
> 
> You should be able to see the status in the netdev patchwork instance
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.kernel.org%2Fproject%2Fnetdevbpf%2Flist%2F&amp;data=04%7C01%7CJoakim.Tjernlund%40infinera.com%7C25615f4c00a44959810208d8b186e496%7C285643de5f5b4b03a1530ae2dc8aaf77%7C1%7C0%7C637454540217112252%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=00l%2FBYyxnAoshH1aZMwEznVFQZXwaYGe3pTo6R3RG3Q%3D&amp;reserved=0
> 
> 	Andrew

Hi Andrew

I found here: 
https://patchwork.kernel.org/project/netdevbpf/patch/20201218105538.30563-2-rasmus.villemoes@prevas.dk/

Seem to be underway but stable isn't included, I think it should be.

 Jocke

^ permalink raw reply

* Re: [PATCH 01/20] ethernet: ucc_geth: set dev->max_mtu to 1518
From: Andrew Lunn @ 2021-01-05 14:33 UTC (permalink / raw)
  To: Joakim Tjernlund
  Cc: f.fainelli@gmail.com, rasmus.villemoes@prevas.dk,
	vladimir.oltean@nxp.com, linux-kernel@vger.kernel.org,
	murali.policharla@broadcom.com, netdev@vger.kernel.org,
	kuba@kernel.org, leoyang.li@nxp.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net,
	qiang.zhao@nxp.com
In-Reply-To: <33816fa937efc8d4865d95754965e59ccfb75f2c.camel@infinera.com>

On Tue, Jan 05, 2021 at 02:17:42PM +0000, Joakim Tjernlund wrote:
> On Thu, 2020-12-10 at 02:25 +0100, Andrew Lunn wrote:
> > On Sat, Dec 05, 2020 at 08:17:24PM +0100, Rasmus Villemoes wrote:
> > > All the buffers and registers are already set up appropriately for an
> > > MTU slightly above 1500, so we just need to expose this to the
> > > networking stack. AFAICT, there's no need to implement .ndo_change_mtu
> > > when the receive buffers are always set up to support the max_mtu.
> > > 
> > > This fixes several warnings during boot on our mpc8309-board with an
> > > embedded mv88e6250 switch:
> > > 
> > > mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 0
> > > ...
> > > mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 4
> > > ucc_geth e0102000.ethernet eth1: error -22 setting MTU to 1504 to include DSA overhead
> > > 
> > > The last line explains what the DSA stack tries to do: achieving an MTU
> > > of 1500 on-the-wire requires that the master netdevice connected to
> > > the CPU port supports an MTU of 1500+the tagging overhead.
> > > 
> > > Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
> > > Cc: Vladimir Oltean <vladimir.oltean@nxp.com>
> > > Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
> > 
> > Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> > 
> >     Andrew
> 
> I don't see this in any kernel, seems stuck? Maybe because the series as a whole is not approved?

Hi Joakim

When was it posted? If it was while netdev was closed during the merge
window, you need to repost.

You should be able to see the status in the netdev patchwork instance

https://patchwork.kernel.org/project/netdevbpf/list/

	Andrew

^ permalink raw reply

* Re: [PATCH 01/20] ethernet: ucc_geth: set dev->max_mtu to 1518
From: Joakim Tjernlund @ 2021-01-05 14:17 UTC (permalink / raw)
  To: rasmus.villemoes@prevas.dk, andrew@lunn.ch
  Cc: f.fainelli@gmail.com, vladimir.oltean@nxp.com,
	linux-kernel@vger.kernel.org, murali.policharla@broadcom.com,
	netdev@vger.kernel.org, kuba@kernel.org, leoyang.li@nxp.com,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net,
	qiang.zhao@nxp.com
In-Reply-To: <20201210012502.GB2638572@lunn.ch>

On Thu, 2020-12-10 at 02:25 +0100, Andrew Lunn wrote:
> On Sat, Dec 05, 2020 at 08:17:24PM +0100, Rasmus Villemoes wrote:
> > All the buffers and registers are already set up appropriately for an
> > MTU slightly above 1500, so we just need to expose this to the
> > networking stack. AFAICT, there's no need to implement .ndo_change_mtu
> > when the receive buffers are always set up to support the max_mtu.
> > 
> > This fixes several warnings during boot on our mpc8309-board with an
> > embedded mv88e6250 switch:
> > 
> > mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 0
> > ...
> > mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 4
> > ucc_geth e0102000.ethernet eth1: error -22 setting MTU to 1504 to include DSA overhead
> > 
> > The last line explains what the DSA stack tries to do: achieving an MTU
> > of 1500 on-the-wire requires that the master netdevice connected to
> > the CPU port supports an MTU of 1500+the tagging overhead.
> > 
> > Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
> > Cc: Vladimir Oltean <vladimir.oltean@nxp.com>
> > Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
> 
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> 
>     Andrew

I don't see this in any kernel, seems stuck? Maybe because the series as a whole is not approved?

 Jocke

^ permalink raw reply

* Re: [RFC please help] membarrier: Rewrite sync_core_before_usermode()
From: Will Deacon @ 2021-01-05 13:26 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Arnd Bergmann, X86 ML, LKML, Nicholas Piggin, Mathieu Desnoyers,
	Catalin Marinas, Paul Mackerras, stable, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <CALCETrX4v1KEf6ikVtFg6juh3Z_esJ-+6PLT1A21JJeTVh2k8g@mail.gmail.com>

Hi Andy,

Sorry for the slow reply, I was socially distanced from my keyboard.

On Mon, Dec 28, 2020 at 04:36:11PM -0800, Andy Lutomirski wrote:
> On Mon, Dec 28, 2020 at 4:11 PM Nicholas Piggin <npiggin@gmail.com> wrote:
> > > +static inline void membarrier_sync_core_before_usermode(void)
> > > +{
> > > +     /*
> > > +      * XXX: I know basically nothing about powerpc cache management.
> > > +      * Is this correct?
> > > +      */
> > > +     isync();
> >
> > This is not about memory ordering or cache management, it's about
> > pipeline management. Powerpc's return to user mode serializes the
> > CPU (aka the hardware thread, _not_ the core; another wrongness of
> > the name, but AFAIKS the HW thread is what is required for
> > membarrier). So this is wrong, powerpc needs nothing here.
> 
> Fair enough.  I'm happy to defer to you on the powerpc details.  In
> any case, this just illustrates that we need feedback from a person
> who knows more about ARM64 than I do.

I think we're in a very similar boat to PowerPC, fwiw. Roughly speaking:

  1. SYNC_CORE does _not_ perform any cache management; that is the
     responsibility of userspace, either by executing the relevant
     maintenance instructions (arm64) or a system call (arm32). Crucially,
     the hardware will ensure that this cache maintenance is broadcast
     to all other CPUs.

  2. Even with all the cache maintenance in the world, a CPU could have
     speculatively fetched stale instructions into its "pipeline" ahead of
     time, and these are _not_ flushed by the broadcast maintenance instructions
     in (1). SYNC_CORE provides a means for userspace to discard these stale
     instructions.

  3. The context synchronization event on exception entry/exit is
     sufficient here. The Arm ARM isn't very good at describing what it
     does, because it's in denial about the existence of a pipeline, but
     it does have snippets such as:

	(s/PE/CPU/)
       | For all types of memory:
       | The PE might have fetched the instructions from memory at any time
       | since the last Context synchronization event on that PE.

     Interestingly, the architecture recently added a control bit to remove
     this synchronisation from exception return, so if we set that then we'd
     have a problem with SYNC_CORE and adding an ISB would be necessary (and
     we could probable then make kernel->kernel returns cheaper, but I
     suspect we're relying on this implicit synchronisation in other places
     too).

Are you seeing a problem in practice, or did this come up while trying to
decipher the semantics of SYNC_CORE?

Will

^ permalink raw reply

* [PATCH v2] net: ethernet: fs_enet: Add missing MODULE_LICENSE
From: Michael Ellerman @ 2021-01-05  9:15 UTC (permalink / raw)
  To: netdev; +Cc: andrew, linux-kernel, linuxppc-dev, kuba, davem

Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
into error") the ppc32_allmodconfig build fails with:

  ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
  ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o

Add the missing MODULE_LICENSEs to fix the build. Both files include a
copyright header indicating they are GPL v2.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 1 +
 drivers/net/ethernet/freescale/fs_enet/mii-fec.c     | 1 +
 2 files changed, 2 insertions(+)

v2: Use simply "GPL" as pointed out by Andrew:
    https://lore.kernel.org/lkml/X%2FPRX+RziaU3IJGi@lunn.ch/

diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
index c8e5d889bd81..21de56345503 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
@@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = {
 };
 
 module_platform_driver(fs_enet_bb_mdio_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
index 8b51ee142fa3..152f4d83765a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
@@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = {
 };
 
 module_platform_driver(fs_enet_fec_mdio_driver);
+MODULE_LICENSE("GPL");
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH] net: ethernet: fs_enet: Add missing MODULE_LICENSE
From: Michael Ellerman @ 2021-01-05  9:12 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev, linux-kernel, linuxppc-dev, kuba, davem
In-Reply-To: <X/PRX+RziaU3IJGi@lunn.ch>

Andrew Lunn <andrew@lunn.ch> writes:
> On Tue, Jan 05, 2021 at 01:22:29PM +1100, Michael Ellerman wrote:
>> Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
>> into error") the ppc32_allmodconfig build fails with:
>> 
>>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
>>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o
>> 
>> Add the missing MODULE_LICENSEs to fix the build. Both files include a
>> copyright header indicating they are GPL v2.
>> 
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
>> ---
>>  drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 1 +
>>  drivers/net/ethernet/freescale/fs_enet/mii-fec.c     | 1 +
>>  2 files changed, 2 insertions(+)
>> 
>> diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
>> index c8e5d889bd81..76ac1a9eab58 100644
>> --- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
>> +++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
>> @@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = {
>>  };
>>  
>>  module_platform_driver(fs_enet_bb_mdio_driver);
>> +MODULE_LICENSE("GPL v2");
>> diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
>> index 8b51ee142fa3..407c330b432f 100644
>> --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
>> +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
>> @@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = {
>>  };
>>  
>>  module_platform_driver(fs_enet_fec_mdio_driver);
>> +MODULE_LICENSE("GPL v2");
>
> Hi Michael
>
> The use of "GPL v2" has been deprecated. Please use just "GPL". There
> is a discussion about this here:
>
> https://lore.kernel.org/patchwork/patch/1036331/
>
> https://www.kernel.org/doc/html/latest/process/license-rules.html#id1

Ah sorry, I just grepped for "GPL v2" and saw lots of examples so copied
that. Will post a new version using "GPL".

cheers

^ permalink raw reply

* [PATCH v3 2/2] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM
From: Bharata B Rao @ 2021-01-05  9:05 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david
In-Reply-To: <20210105090557.2150104-1-bharata@linux.ibm.com>

In the nested KVM case, replace H_TLB_INVALIDATE by the new hcall
H_RPT_INVALIDATE if available. The availability of this hcall
is determined from "hcall-rpt-invalidate" string in ibm,hypertas-functions
DT property.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 27 +++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_nested.c    | 12 ++++++++++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..7ea5459022cb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
 #include <asm/pte-walk.h>
 #include <asm/ultravisor.h>
 #include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
 
 /*
  * Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	}
 
 	psi = shift_to_mmu_psize(pshift);
-	rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
-				lpid, rb);
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+		rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+					lpid, rb);
+	} else {
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB,
+					    psize_to_rpti_pgsize(psi),
+					    addr, addr + psize);
+	}
+
 	if (rc)
 		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+					    0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 40ed4eb80adb..0ebddb615684 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pte-walk.h>
 #include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
 
 static struct patb_entry *pseries_partition_tb;
 
@@ -402,8 +403,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					    H_RPTI_TYPE_PAT,
+					    H_RPTI_PAGE_ALL, 0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 }
-- 
2.26.2


^ permalink raw reply related

* [PATCH v3 1/2] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
From: Bharata B Rao @ 2021-01-05  9:05 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david
In-Reply-To: <20210105090557.2150104-1-bharata@linux.ibm.com>

Implement H_RPT_INVALIDATE hcall and add KVM capability
KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.

This hcall does two types of TLB invalidations:

1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
   This is currently not used in KVM as GTSE is not usually
   disabled in KVM.
2. Partition-scoped invalidations that an L1 hypervisor does on
   behalf of an L2 guest. This replaces the uses of the existing
   hcall H_TLB_INVALIDATE.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 Documentation/virt/kvm/api.rst                | 17 ++++
 .../include/asm/book3s/64/tlbflush-radix.h    | 18 ++++
 arch/powerpc/include/asm/kvm_book3s.h         |  3 +
 arch/powerpc/include/asm/mmu_context.h        |  7 ++
 arch/powerpc/kvm/book3s_hv.c                  | 56 +++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c           | 96 +++++++++++++++++++
 arch/powerpc/kvm/powerpc.c                    |  3 +
 arch/powerpc/mm/book3s64/radix_tlb.c          | 24 ++++-
 include/uapi/linux/kvm.h                      |  1 +
 9 files changed, 221 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 70254eaa5229..acf1fb4a0e1d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6032,6 +6032,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
 can then handle to implement model specific MSR handling and/or user notifications
 to inform a user that an MSR was not handled.
 
+7.22 KVM_CAP_PPC_RPT_INVALIDATE
+------------------------------
+
+:Capability: KVM_CAP_PPC_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is always enabled.
+
 8. Other capabilities.
 ======================
 
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 94439e0cefc9..aace7e9b2397 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
 
 #include <asm/hvcall.h>
 
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
@@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
 	return H_RPTI_PAGE_ALL;
 }
 
+static inline int rpti_pgsize_to_psize(unsigned long page_size)
+{
+	if (page_size == H_RPTI_PAGE_4K)
+		return MMU_PAGE_4K;
+	if (page_size == H_RPTI_PAGE_64K)
+		return MMU_PAGE_64K;
+	if (page_size == H_RPTI_PAGE_2M)
+		return MMU_PAGE_2M;
+	if (page_size == H_RPTI_PAGE_1G)
+		return MMU_PAGE_1G;
+	else
+		return MMU_PAGE_64K; /* Default */
+}
+
 static inline int mmu_get_ap(int psize)
 {
 	return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index d32ec9ae73bd..0f1c5fa6e8ce 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+			 unsigned long type, unsigned long pg_sizes,
+			 unsigned long start, unsigned long end);
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
 			  u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index d5821834dba9..17b2995a55ed 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -124,8 +124,15 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
 
 #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
+void do_h_rpt_invalidate(unsigned long pid, unsigned long type,
+			 unsigned long page_size, unsigned long psize,
+			 unsigned long start, unsigned long end);
 #else
 static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
+static inline void do_h_rpt_invalidate(unsigned long pid, unsigned long type,
+				       unsigned long page_size,
+				       unsigned long psize, unsigned long start,
+				       unsigned long end) { }
 #endif
 
 extern void switch_cop(struct mm_struct *next);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6f612d240392..8df391b63330 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -904,6 +904,53 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 	return yield_count;
 }
 
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+				    unsigned long pid, unsigned long target,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	unsigned long psize;
+
+	if (!kvm_is_radix(vcpu->kvm))
+		return H_UNSUPPORTED;
+
+	if (end < start)
+		return H_P5;
+
+	if (type & H_RPTI_TYPE_NESTED) {
+		if (!nesting_enabled(vcpu->kvm))
+			return H_FUNCTION;
+
+		/* Support only cores as target */
+		if (target != H_RPTI_TARGET_CMMU)
+			return H_P2;
+
+		return kvmhv_h_rpti_nested(vcpu, pid,
+					   (type & ~H_RPTI_TYPE_NESTED),
+					    pg_sizes, start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_64K) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
+		do_h_rpt_invalidate(pid, type, (1UL << 16), psize,
+				    start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_2M) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
+		do_h_rpt_invalidate(pid, type, (1UL << 21), psize,
+				    start, end);
+	}
+
+	if (pg_sizes & H_RPTI_PAGE_1G) {
+		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
+		do_h_rpt_invalidate(pid, type, (1UL << 30), psize,
+				    start, end);
+	}
+
+	return H_SUCCESS;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -1112,6 +1159,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		 */
 		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
 		break;
+	case H_RPT_INVALIDATE:
+		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+					      kvmppc_get_gpr(vcpu, 5),
+					      kvmppc_get_gpr(vcpu, 6),
+					      kvmppc_get_gpr(vcpu, 7),
+					      kvmppc_get_gpr(vcpu, 8),
+					      kvmppc_get_gpr(vcpu, 9));
+		break;
 
 	default:
 		return RESUME_HOST;
@@ -1158,6 +1213,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_XIRR_X:
 #endif
 	case H_PAGE_INIT:
+	case H_RPT_INVALIDATE:
 		return 1;
 	}
 
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 33b58549a9aa..40ed4eb80adb 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1149,6 +1149,102 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
 	return H_SUCCESS;
 }
 
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+					 unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long ap,
+					 unsigned long start,
+					 unsigned long end)
+{
+	unsigned long addr = start;
+	int ret;
+
+	do {
+		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+						   get_epn(addr));
+		if (ret)
+			return ret;
+		addr += page_size;
+	} while (addr < end);
+
+	return ret;
+}
+
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+					 unsigned long lpid)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (gp) {
+		kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
+		kvmhv_put_nested(gp);
+	}
+	return H_SUCCESS;
+}
+
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+			 unsigned long type, unsigned long pg_sizes,
+			 unsigned long start, unsigned long end)
+{
+	struct kvm_nested_guest *gp;
+	long ret;
+	unsigned long psize, ap;
+
+	/*
+	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
+	 *
+	 * However, nested KVM issues a L2 lpid flush call when creating
+	 * partition table entries for L2. This happens even before the
+	 * corresponding shadow lpid is created in HV which happens in
+	 * H_ENTER_NESTED call. Since we can't differentiate this case from
+	 * the invalid case, we ignore such flush requests and return success.
+	 */
+	gp = kvmhv_find_nested(vcpu->kvm, lpid);
+	if (!gp)
+		return H_SUCCESS;
+
+	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+		return do_tlb_invalidate_nested_all(vcpu, lpid);
+
+	if ((type & H_RPTI_TYPE_TLB) == H_RPTI_TYPE_TLB) {
+		if (pg_sizes & H_RPTI_PAGE_64K) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 16),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+
+		if (pg_sizes & H_RPTI_PAGE_2M) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 21),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+
+		if (pg_sizes & H_RPTI_PAGE_1G) {
+			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
+			ap = mmu_get_ap(psize);
+
+			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
+							   (1UL << 30),
+							   ap, start, end);
+			if (ret)
+				return H_P4;
+		}
+	}
+	return H_SUCCESS;
+}
+
 /* Used to convert a nested guest real address to a L1 guest real address */
 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
 				       struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cf52d26f49cd..5388cd4a206a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
 			!kvmppc_hv_ops->enable_svm(NULL);
 		break;
+	case KVM_CAP_PPC_RPT_INVALIDATE:
+		r = 1;
+		break;
 #endif
 	default:
 		r = 0;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index fb66d154b26c..c120a2ad4684 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -18,10 +18,6 @@
 #include <asm/cputhreads.h>
 #include <asm/plpar_wrappers.h>
 
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
 /*
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
@@ -1286,4 +1282,24 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 	}
 }
 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+
+void do_h_rpt_invalidate(unsigned long pid, unsigned long type,
+			 unsigned long page_size, unsigned long psize,
+			 unsigned long start, unsigned long end)
+{
+	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+		_tlbie_pid(pid, RIC_FLUSH_ALL);
+		return;
+	}
+
+	if (type & H_RPTI_TYPE_PWC)
+		_tlbie_pid(pid, RIC_FLUSH_PWC);
+
+	if (!start && end == -1) /* PID */
+		_tlbie_pid(pid, RIC_FLUSH_TLB);
+	else /* EA */
+		_tlbie_va_range(start, end, pid, page_size, psize, false);
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate);
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 886802b8ffba..ae9854830dcc 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1056,6 +1056,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PPC_RPT_INVALIDATE 193
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH v3 0/2] Support for H_RPT_INVALIDATE in PowerPC KVM
From: Bharata B Rao @ 2021-01-05  9:05 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev
  Cc: farosas, aneesh.kumar, npiggin, Bharata B Rao, david

This patchset adds support for the new hcall H_RPT_INVALIDATE
and replaces the nested tlb flush calls with this new hcall
if support for the same exists.

Changes in v3:
-------------
- Reused the tlb flush routines from radix_tlb.c
- Ensured fixups are take care of due to the above change.
 
v2: https://lore.kernel.org/linuxppc-dev/20201217033335.GD310465@yekko.fritz.box/T/#t

H_RPT_INVALIDATE
================
Syntax:
int64   /* H_Success: Return code on successful completion */
        /* H_Busy - repeat the call with the same */
        /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid parameters */
        hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT translation lookaside information */
              uint64 pid,       /* PID/LPID to invalidate */
              uint64 target,    /* Invalidation target */
              uint64 type,      /* Type of lookaside information */
              uint64 pageSizes,     /* Page sizes */
              uint64 start,     /* Start of Effective Address (EA) range (inclusive) */
              uint64 end)       /* End of EA range (exclusive) */

Invalidation targets (target)
-----------------------------
Core MMU        0x01 /* All virtual processors in the partition */
Core local MMU  0x02 /* Current virtual processor */
Nest MMU        0x04 /* All nest/accelerator agents in use by the partition */

A combination of the above can be specified, except core and core local.

Type of translation to invalidate (type)
---------------------------------------
NESTED       0x0001  /* Invalidate nested guest partition-scope */
TLB          0x0002  /* Invalidate TLB */
PWC          0x0004  /* Invalidate Page Walk Cache */
PRT          0x0008  /* Invalidate Process Table Entries if NESTED is clear */
PAT          0x0008  /* Invalidate Partition Table Entries if NESTED is set */

A combination of the above can be specified.

Page size mask (pageSizes)
--------------------------
4K              0x01
64K             0x02
2M              0x04
1G              0x08
All sizes       (-1UL)

A combination of the above can be specified.
All page sizes can be selected with -1.

Semantics: Invalidate radix tree lookaside information
           matching the parameters given.
* Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters are
  different from the defined values.
* Return H_PARAMETER if NESTED is set and pid is not a valid nested
  LPID allocated to this partition
* Return H_P5 if (start, end) doesn't form a valid range. Start and end
  should be a valid Quadrant address and  end > start.
* Return H_NotSupported if the partition is not in running in radix
  translation mode.
* May invalidate more translation information than requested.
* If start = 0 and end = -1, set the range to cover all valid addresses.
  Else start and end should be aligned to 4kB (lower 11 bits clear).
* If NESTED is clear, then invalidate process scoped lookaside information.
  Else pid specifies a nested LPID, and the invalidation is performed
  on nested guest partition table and nested guest partition scope real
  addresses.
* If pid = 0 and NESTED is clear, then valid addresses are quadrant 3 and
  quadrant 0 spaces, Else valid addresses are quadrant 0.
* Pages which are fully covered by the range are to be invalidated.
  Those which are partially covered are considered outside invalidation
  range, which allows a caller to optimally invalidate ranges that may
  contain mixed page sizes.
* Return H_SUCCESS on success.

Bharata B Rao (2):
  KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

 Documentation/virt/kvm/api.rst                |  17 +++
 .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
 arch/powerpc/include/asm/kvm_book3s.h         |   3 +
 arch/powerpc/include/asm/mmu_context.h        |   7 ++
 arch/powerpc/kvm/book3s_64_mmu_radix.c        |  27 ++++-
 arch/powerpc/kvm/book3s_hv.c                  |  56 +++++++++
 arch/powerpc/kvm/book3s_hv_nested.c           | 108 +++++++++++++++++-
 arch/powerpc/kvm/powerpc.c                    |   3 +
 arch/powerpc/mm/book3s64/radix_tlb.c          |  24 +++-
 include/uapi/linux/kvm.h                      |   1 +
 10 files changed, 253 insertions(+), 11 deletions(-)

-- 
2.26.2


^ permalink raw reply

* Re: [PATCH v3 5/6] mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
From: Greg KH @ 2021-01-05  9:05 UTC (permalink / raw)
  To: Santosh Sivaraj
  Cc: Sasha Levin, Peter Zijlstra, stable, Aneesh Kumar K . V,
	linuxppc-dev
In-Reply-To: <20200312132740.225241-6-santosh@fossix.org>

On Thu, Mar 12, 2020 at 06:57:39PM +0530, Santosh Sivaraj wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> 
> commit 0ed1325967ab5f7a4549a2641c6ebe115f76e228 upstream.
> 
> Architectures for which we have hardware walkers of Linux page table
> should flush TLB on mmu gather batch allocation failures and batch flush.
> Some architectures like POWER supports multiple translation modes (hash
> and radix) and in the case of POWER only radix translation mode needs the
> above TLBI.  This is because for hash translation mode kernel wants to
> avoid this extra flush since there are no hardware walkers of linux page
> table.  With radix translation, the hardware also walks linux page table
> and with that, kernel needs to make sure to TLB invalidate page walk cache
> before page table pages are freed.
> 
> More details in commit d86564a2f085 ("mm/tlb, x86/mm: Support invalidating
> TLB caches for RCU_TABLE_FREE")
> 
> The changes to sparc are to make sure we keep the old behavior since we
> are now removing HAVE_RCU_TABLE_NO_INVALIDATE.  The default value for
> tlb_needs_table_invalidate is to always force an invalidate and sparc can
> avoid the table invalidate.  Hence we define tlb_needs_table_invalidate to
> false for sparc architecture.
> 
> Link: http://lkml.kernel.org/r/20200116064531.483522-3-aneesh.kumar@linux.ibm.com
> Fixes: a46cc7a90fd8 ("powerpc/mm/radix: Improve TLB/PWC flushes")
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Cc: <stable@vger.kernel.org>  # 4.19
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
> [santosh: backported to 4.19 stable]
> ---
>  arch/Kconfig                    |  3 ---
>  arch/powerpc/Kconfig            |  1 -
>  arch/powerpc/include/asm/tlb.h  | 11 +++++++++++
>  arch/sparc/Kconfig              |  1 -
>  arch/sparc/include/asm/tlb_64.h |  9 +++++++++
>  include/asm-generic/tlb.h       | 15 +++++++++++++++
>  mm/memory.c                     | 16 ++++++++--------
>  7 files changed, 43 insertions(+), 13 deletions(-)

As the testing pointed out, this breaks the build on lots of arches:
	https://lore.kernel.org/r/CAEUSe78+F1Q9LFjpf8SQzQa6+Ak4wcPiiNcUVxEcv+KPdrYvBw@mail.gmail.com
	https://lore.kernel.org/r/cff87cd2-8cd5-241e-3a05-a817b1a56b8c@roeck-us.net

so I'm going to drop this whole series and do a -rc2.

If you still want/need this series in 4.19, please make sure it really
works for everyone :)

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH] net: ethernet: fs_enet: Add missing MODULE_LICENSE
From: Andrew Lunn @ 2021-01-05  2:39 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: netdev, linux-kernel, linuxppc-dev, kuba, davem
In-Reply-To: <20210105022229.54601-1-mpe@ellerman.id.au>

On Tue, Jan 05, 2021 at 01:22:29PM +1100, Michael Ellerman wrote:
> Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
> into error") the ppc32_allmodconfig build fails with:
> 
>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
>   ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o
> 
> Add the missing MODULE_LICENSEs to fix the build. Both files include a
> copyright header indicating they are GPL v2.
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
>  drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 1 +
>  drivers/net/ethernet/freescale/fs_enet/mii-fec.c     | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
> index c8e5d889bd81..76ac1a9eab58 100644
> --- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
> +++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
> @@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = {
>  };
>  
>  module_platform_driver(fs_enet_bb_mdio_driver);
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
> index 8b51ee142fa3..407c330b432f 100644
> --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
> +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
> @@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = {
>  };
>  
>  module_platform_driver(fs_enet_fec_mdio_driver);
> +MODULE_LICENSE("GPL v2");

Hi Michael

The use of "GPL v2" has been deprecated. Please use just "GPL". There
is a discussion about this here:

https://lore.kernel.org/patchwork/patch/1036331/

https://www.kernel.org/doc/html/latest/process/license-rules.html#id1

	Andrew

^ permalink raw reply

* [Bug 210911] error: implicit declaration of function 'cleanup_cpu_mmu_context' [-Werror=implicit-function-declaration]
From: bugzilla-daemon @ 2021-01-05  2:29 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-210911-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=210911

Michael Ellerman (michael@ellerman.id.au) changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|RESOLVED                    |CLOSED

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox