* [PATCH 01/10] dma-direct: provide a generic implementation of DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
2018-12-08 17:36 ` [PATCH 02/10] arm64/iommu: don't remap contiguous allocations for coherent devices Christoph Hellwig
` (8 subsequent siblings)
9 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
If DMA_ATTR_NON_CONSISTENT is passed in the flags we can always just
use the dma_direct_alloc_pages implementation given that the callers
will take care of any cache maintainance on ownership transfers between
the CPU and the device.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/arc/mm/dma.c | 21 ++++++--------------
arch/mips/mm/dma-noncoherent.c | 5 ++---
arch/openrisc/kernel/dma.c | 23 +++++++++-------------
arch/parisc/kernel/pci-dma.c | 35 ++++++++++++----------------------
kernel/dma/direct.c | 4 ++--
5 files changed, 31 insertions(+), 57 deletions(-)
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index db203ff69ccf..135759d4ea8c 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -24,7 +24,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
struct page *page;
phys_addr_t paddr;
void *kvaddr;
- bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT);
/*
* __GFP_HIGHMEM flag is cleared by upper layer functions
@@ -46,14 +45,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* A coherent buffer needs MMU mapping to enforce non-cachability.
* kvaddr is kernel Virtual address (0x7000_0000 based).
*/
- if (need_coh) {
- kvaddr = ioremap_nocache(paddr, size);
- if (kvaddr == NULL) {
- __free_pages(page, order);
- return NULL;
- }
- } else {
- kvaddr = (void *)(u32)paddr;
+ kvaddr = ioremap_nocache(paddr, size);
+ if (kvaddr == NULL) {
+ __free_pages(page, order);
+ return NULL;
}
/*
@@ -66,9 +61,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* Currently flush_cache_vmap nukes the L1 cache completely which
* will be optimized as a separate commit
*/
- if (need_coh)
- dma_cache_wback_inv(paddr, size);
-
+ dma_cache_wback_inv(paddr, size);
return kvaddr;
}
@@ -78,9 +71,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
phys_addr_t paddr = dma_handle;
struct page *page = virt_to_page(paddr);
- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- iounmap((void __force __iomem *)vaddr);
-
+ iounmap((void __force __iomem *)vaddr);
__free_pages(page, get_order(size));
}
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index cb38461391cb..7576cd7193ba 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
void *ret;
ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
- if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+ if (ret) {
dma_cache_wback_inv((unsigned long) ret, size);
ret = (void *)UNCAC_ADDR(ret);
}
@@ -61,8 +61,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
+ cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
}
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 159336adfa2f..483adbb000bb 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -98,15 +98,13 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
va = (unsigned long)page;
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
- /*
- * We need to iterate through the pages, clearing the dcache for
- * them and setting the cache-inhibit bit.
- */
- if (walk_page_range(va, va + size, &walk)) {
- free_pages_exact(page, size);
- return NULL;
- }
+ /*
+ * We need to iterate through the pages, clearing the dcache for
+ * them and setting the cache-inhibit bit.
+ */
+ if (walk_page_range(va, va + size, &walk)) {
+ free_pages_exact(page, size);
+ return NULL;
}
return (void *)va;
@@ -122,11 +120,8 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
.mm = &init_mm
};
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
- /* walk_page_range shouldn't be able to fail here */
- WARN_ON(walk_page_range(va, va + size, &walk));
- }
-
+ /* walk_page_range shouldn't be able to fail here */
+ WARN_ON(walk_page_range(va, va + size, &walk));
free_pages_exact(vaddr, size);
}
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 04c48f1ef3fb..6780449e3e8b 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -421,29 +421,18 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size,
return (void *)vaddr;
}
-static void *pcx_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
+static inline bool cpu_supports_coherent_area(void)
{
- void *addr;
-
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
- return NULL;
-
- addr = (void *)__get_free_pages(flag, get_order(size));
- if (addr)
- *dma_handle = (dma_addr_t)virt_to_phys(addr);
-
- return addr;
+ return boot_cpu_data.cpu_type == pcxl2 ||
+ boot_cpu_data.cpu_type == pcxl;
}
void *arch_dma_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
-
- if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
+ if (cpu_supports_coherent_area())
return pcxl_dma_alloc(dev, size, dma_handle, gfp, attrs);
- else
- return pcx_dma_alloc(dev, size, dma_handle, gfp, attrs);
+ return NULL;
}
void arch_dma_free(struct device *dev, size_t size, void *vaddr,
@@ -451,14 +440,14 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
{
int order = get_order(size);
- if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
- size = 1 << (order + PAGE_SHIFT);
- unmap_uncached_pages((unsigned long)vaddr, size);
- pcxl_free_range((unsigned long)vaddr, size);
+ if (WARN_ON_ONCE(!cpu_supports_coherent_area()))
+ return;
- vaddr = __va(dma_handle);
- }
- free_pages((unsigned long)vaddr, get_order(size));
+ size = 1 << (order + PAGE_SHIFT);
+ unmap_uncached_pages((unsigned long)vaddr, size);
+ pcxl_free_range((unsigned long)vaddr, size);
+
+ free_pages((unsigned long)__va(dma_handle), get_order(size));
}
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 308f88a750c8..4efe1188fd2e 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -206,7 +206,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
}
@@ -214,7 +214,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
void dma_direct_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT))
arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
else
dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 02/10] arm64/iommu: don't remap contiguous allocations for coherent devices
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
2018-12-08 17:36 ` [PATCH 01/10] dma-direct: provide a generic implementation of DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
[not found] ` <20181208173702.15158-3-hch-jcswGhMUV9g@public.gmane.org>
2018-12-08 17:36 ` [PATCH 03/10] arm64/iommu: implement support for DMA_ATTR_NON_CONSISTENT Christoph Hellwig
` (7 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
There is no need to have an additional kernel mapping for a contiguous
allocation if the device already is DMA coherent, so skip it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/arm64/mm/dma-mapping.c | 35 ++++++++++++++++++++++-------------
1 file changed, 22 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4c0f498069e8..d39b60113539 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -255,13 +255,18 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
size >> PAGE_SHIFT);
return NULL;
}
+
+ if (coherent) {
+ memset(addr, 0, size);
+ return addr;
+ }
+
addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
prot,
__builtin_return_address(0));
if (addr) {
memset(addr, 0, size);
- if (!coherent)
- __dma_flush_area(page_to_virt(page), iosize);
+ __dma_flush_area(page_to_virt(page), iosize);
} else {
iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
dma_release_from_contiguous(dev, page,
@@ -309,7 +314,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
- dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+
+ if (!dev_is_dma_coherent(dev))
+ dma_common_free_remap(cpu_addr, size, VM_USERMAP);
} else if (is_vmalloc_addr(cpu_addr)){
struct vm_struct *area = find_vm_area(cpu_addr);
@@ -336,11 +343,12 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
return ret;
if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- /*
- * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
- * hence in the vmalloc space.
- */
- unsigned long pfn = vmalloc_to_pfn(cpu_addr);
+ unsigned long pfn;
+
+ if (dev_is_dma_coherent(dev))
+ pfn = virt_to_pfn(cpu_addr);
+ else
+ pfn = vmalloc_to_pfn(cpu_addr);
return __swiotlb_mmap_pfn(vma, pfn, size);
}
@@ -359,11 +367,12 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
struct vm_struct *area = find_vm_area(cpu_addr);
if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- /*
- * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
- * hence in the vmalloc space.
- */
- struct page *page = vmalloc_to_page(cpu_addr);
+ struct page *page;
+
+ if (dev_is_dma_coherent(dev))
+ page = virt_to_page(cpu_addr);
+ else
+ page = vmalloc_to_page(cpu_addr);
return __swiotlb_get_sgtable_page(sgt, page, size);
}
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 03/10] arm64/iommu: implement support for DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
2018-12-08 17:36 ` [PATCH 01/10] dma-direct: provide a generic implementation of DMA_ATTR_NON_CONSISTENT Christoph Hellwig
2018-12-08 17:36 ` [PATCH 02/10] arm64/iommu: don't remap contiguous allocations for coherent devices Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
2018-12-08 17:36 ` [PATCH 04/10] arm: implement DMA_ATTR_NON_CONSISTENT Christoph Hellwig
` (6 subsequent siblings)
9 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
DMA_ATTR_NON_CONSISTENT forces contiguous allocations as we don't
want to remap, and is otherwise forced down the same pass as if we
were always on a coherent device. No new code required except for
a few conditionals.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/arm64/mm/dma-mapping.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index d39b60113539..0010688ca30e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -240,7 +240,8 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
dma_free_from_pool(addr, size);
addr = NULL;
}
- } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ } else if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS |
+ DMA_ATTR_NON_CONSISTENT)) {
pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
struct page *page;
@@ -256,7 +257,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
return NULL;
}
- if (coherent) {
+ if (coherent || (attrs & DMA_ATTR_NON_CONSISTENT)) {
memset(addr, 0, size);
return addr;
}
@@ -309,7 +310,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
if (dma_in_atomic_pool(cpu_addr, size)) {
iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
dma_free_from_pool(cpu_addr, size);
- } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ } else if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS |
+ DMA_ATTR_NON_CONSISTENT)) {
struct page *page = vmalloc_to_page(cpu_addr);
iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
@@ -342,10 +344,11 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
- if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NON_CONSISTENT)) {
unsigned long pfn;
- if (dev_is_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev) ||
+ (attrs & DMA_ATTR_NON_CONSISTENT))
pfn = virt_to_pfn(cpu_addr);
else
pfn = vmalloc_to_pfn(cpu_addr);
@@ -366,10 +369,11 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct vm_struct *area = find_vm_area(cpu_addr);
- if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NON_CONSISTENT)) {
struct page *page;
- if (dev_is_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev) ||
+ (attrs & DMA_ATTR_NON_CONSISTENT))
page = virt_to_page(cpu_addr);
else
page = vmalloc_to_page(cpu_addr);
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 04/10] arm: implement DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
` (2 preceding siblings ...)
2018-12-08 17:36 ` [PATCH 03/10] arm64/iommu: implement support for DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
[not found] ` <20181208173702.15158-5-hch-jcswGhMUV9g@public.gmane.org>
2018-12-08 17:36 ` [PATCH 05/10] sparc64/iommu: move code around a bit Christoph Hellwig
` (5 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
For the iommu ops we can just use the implementaton for DMA coherent
devices. For the regular ops we need mix and match a bit so that
we either use the CMA allocator without remapping, but with a special
error handling case for highmem pages, or the simple allocator.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/arm/mm/dma-mapping.c | 49 ++++++++++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 14 deletions(-)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 2cfb17bad1e6..b3b66b41c450 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -49,6 +49,7 @@ struct arm_dma_alloc_args {
const void *caller;
bool want_vaddr;
int coherent_flag;
+ bool nonconsistent_flag;
};
struct arm_dma_free_args {
@@ -57,6 +58,7 @@ struct arm_dma_free_args {
void *cpu_addr;
struct page *page;
bool want_vaddr;
+ bool nonconsistent_flag;
};
#define NORMAL 0
@@ -348,7 +350,8 @@ static void __dma_free_buffer(struct page *page, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag, gfp_t gfp);
+ int coherent_flag, bool nonconsistent_flag,
+ gfp_t gfp);
static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
@@ -405,7 +408,7 @@ static int __init atomic_pool_init(void)
if (dev_get_cma_area(NULL))
ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
&page, atomic_pool_init, true, NORMAL,
- GFP_KERNEL);
+ false, GFP_KERNEL);
else
ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
&page, atomic_pool_init, true);
@@ -579,7 +582,8 @@ static int __free_from_pool(void *start, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag, gfp_t gfp)
+ int coherent_flag, bool nonconsistent_flag,
+ gfp_t gfp)
{
unsigned long order = get_order(size);
size_t count = size >> PAGE_SHIFT;
@@ -595,12 +599,16 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
if (!want_vaddr)
goto out;
+ if (nonconsistent_flag) {
+ if (PageHighMem(page))
+ goto fail;
+ goto out;
+ }
+
if (PageHighMem(page)) {
ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
- if (!ptr) {
- dma_release_from_contiguous(dev, page, count);
- return NULL;
- }
+ if (!ptr)
+ goto fail;
} else {
__dma_remap(page, size, prot);
ptr = page_address(page);
@@ -609,12 +617,15 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
out:
*ret_page = page;
return ptr;
+ fail:
+ dma_release_from_contiguous(dev, page, count);
+ return NULL;
}
static void __free_from_contiguous(struct device *dev, struct page *page,
- void *cpu_addr, size_t size, bool want_vaddr)
+ void *cpu_addr, size_t size, bool remapped)
{
- if (want_vaddr) {
+ if (remapped) {
if (PageHighMem(page))
__dma_free_remap(cpu_addr, size);
else
@@ -635,7 +646,11 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
struct page **ret_page)
{
struct page *page;
- /* __alloc_simple_buffer is only called when the device is coherent */
+ /*
+ * __alloc_simple_buffer is only called when the device is coherent,
+ * or if the caller explicitly asked for an allocation that is not
+ * consistent.
+ */
page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
if (!page)
return NULL;
@@ -667,13 +682,15 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
return __alloc_from_contiguous(args->dev, args->size, args->prot,
ret_page, args->caller,
args->want_vaddr, args->coherent_flag,
+ args->nonconsistent_flag,
args->gfp);
}
static void cma_allocator_free(struct arm_dma_free_args *args)
{
__free_from_contiguous(args->dev, args->page, args->cpu_addr,
- args->size, args->want_vaddr);
+ args->size,
+ args->want_vaddr || args->nonconsistent_flag);
}
static struct arm_dma_allocator cma_allocator = {
@@ -735,6 +752,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
.caller = caller,
.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
.coherent_flag = is_coherent ? COHERENT : NORMAL,
+ .nonconsistent_flag = (attrs & DMA_ATTR_NON_CONSISTENT),
};
#ifdef CONFIG_DMA_API_DEBUG
@@ -773,7 +791,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (cma)
buf->allocator = &cma_allocator;
- else if (is_coherent)
+ else if (is_coherent || (attrs & DMA_ATTR_NON_CONSISTENT))
buf->allocator = &simple_allocator;
else if (allowblock)
buf->allocator = &remap_allocator;
@@ -874,6 +892,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
.cpu_addr = cpu_addr,
.page = page,
.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
+ .nonconsistent_flag = (attrs & DMA_ATTR_NON_CONSISTENT),
};
buf = arm_dma_buffer_find(cpu_addr);
@@ -1562,7 +1581,8 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
{
- return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL);
+ return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs,
+ (attrs & DMA_ATTR_NON_CONSISTENT) ? COHERENT : NORMAL);
}
static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size,
@@ -1650,7 +1670,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
void arm_iommu_free_attrs(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t handle, unsigned long attrs)
{
- __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL);
+ __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs,
+ (attrs & DMA_ATTR_NON_CONSISTENT) ? COHERENT : NORMAL);
}
void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 05/10] sparc64/iommu: move code around a bit
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
` (3 preceding siblings ...)
2018-12-08 17:36 ` [PATCH 04/10] arm: implement DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
[not found] ` <20181208173702.15158-6-hch-jcswGhMUV9g@public.gmane.org>
2018-12-08 17:36 ` [PATCH 06/10] sparc64/iommu: implement DMA_ATTR_NON_CONSISTENT Christoph Hellwig
` (4 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
Move the alloc / free routines down the file so that we can easily use
the map / unmap helpers to implement non-consistent allocations.
Also drop the _coherent postfix to match the method name.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/sparc/kernel/iommu.c | 135 +++++++++++++++++++-------------------
1 file changed, 67 insertions(+), 68 deletions(-)
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 0626bae5e3da..4bf0497e0704 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -195,72 +195,6 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
}
}
-static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addrp, gfp_t gfp,
- unsigned long attrs)
-{
- unsigned long order, first_page;
- struct iommu *iommu;
- struct page *page;
- int npages, nid;
- iopte_t *iopte;
- void *ret;
-
- size = IO_PAGE_ALIGN(size);
- order = get_order(size);
- if (order >= 10)
- return NULL;
-
- nid = dev->archdata.numa_node;
- page = alloc_pages_node(nid, gfp, order);
- if (unlikely(!page))
- return NULL;
-
- first_page = (unsigned long) page_address(page);
- memset((char *)first_page, 0, PAGE_SIZE << order);
-
- iommu = dev->archdata.iommu;
-
- iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
-
- if (unlikely(iopte == NULL)) {
- free_pages(first_page, order);
- return NULL;
- }
-
- *dma_addrp = (iommu->tbl.table_map_base +
- ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
- ret = (void *) first_page;
- npages = size >> IO_PAGE_SHIFT;
- first_page = __pa(first_page);
- while (npages--) {
- iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
- IOPTE_WRITE |
- (first_page & IOPTE_PAGE));
- iopte++;
- first_page += IO_PAGE_SIZE;
- }
-
- return ret;
-}
-
-static void dma_4u_free_coherent(struct device *dev, size_t size,
- void *cpu, dma_addr_t dvma,
- unsigned long attrs)
-{
- struct iommu *iommu;
- unsigned long order, npages;
-
- npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
- iommu = dev->archdata.iommu;
-
- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
-
- order = get_order(size);
- if (order < 10)
- free_pages((unsigned long)cpu, order);
-}
-
static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t sz,
enum dma_data_direction direction,
@@ -742,6 +676,71 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
spin_unlock_irqrestore(&iommu->lock, flags);
}
+static void *dma_4u_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs)
+{
+ unsigned long order, first_page;
+ struct iommu *iommu;
+ struct page *page;
+ int npages, nid;
+ iopte_t *iopte;
+ void *ret;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (order >= 10)
+ return NULL;
+
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ return NULL;
+
+ first_page = (unsigned long) page_address(page);
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ iommu = dev->archdata.iommu;
+
+ iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
+
+ if (unlikely(iopte == NULL)) {
+ free_pages(first_page, order);
+ return NULL;
+ }
+
+ *dma_addrp = (iommu->tbl.table_map_base +
+ ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ npages = size >> IO_PAGE_SHIFT;
+ first_page = __pa(first_page);
+ while (npages--) {
+ iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
+ IOPTE_WRITE |
+ (first_page & IOPTE_PAGE));
+ iopte++;
+ first_page += IO_PAGE_SIZE;
+ }
+
+ return ret;
+}
+
+static void dma_4u_free(struct device *dev, size_t size, void *cpu,
+ dma_addr_t dvma, unsigned long attrs)
+{
+ struct iommu *iommu;
+ unsigned long order, npages;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ iommu = dev->archdata.iommu;
+
+ iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
+
static int dma_4u_supported(struct device *dev, u64 device_mask)
{
struct iommu *iommu = dev->archdata.iommu;
@@ -758,8 +757,8 @@ static int dma_4u_supported(struct device *dev, u64 device_mask)
}
static const struct dma_map_ops sun4u_dma_ops = {
- .alloc = dma_4u_alloc_coherent,
- .free = dma_4u_free_coherent,
+ .alloc = dma_4u_alloc,
+ .free = dma_4u_free,
.map_page = dma_4u_map_page,
.unmap_page = dma_4u_unmap_page,
.map_sg = dma_4u_map_sg,
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 06/10] sparc64/iommu: implement DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
` (4 preceding siblings ...)
2018-12-08 17:36 ` [PATCH 05/10] sparc64/iommu: move code around a bit Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
2018-12-09 4:58 ` David Miller
2018-12-08 17:36 ` [PATCH 07/10] sparc64/pci_sun4v: move code around a bit Christoph Hellwig
` (3 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
Just allocate the memory and use map_page to map the memory.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/sparc/kernel/iommu.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 4bf0497e0704..4ce24c9dc691 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -699,14 +699,19 @@ static void *dma_4u_alloc(struct device *dev, size_t size,
first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order);
+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ *dma_addrp = dma_4u_map_page(dev, page, 0, size,
+ DMA_BIDIRECTIONAL, 0);
+ if (*dma_addrp == DMA_MAPPING_ERROR)
+ goto out_free_page;
+ return page_address(page);
+ }
+
iommu = dev->archdata.iommu;
iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
-
- if (unlikely(iopte == NULL)) {
- free_pages(first_page, order);
- return NULL;
- }
+ if (unlikely(iopte == NULL))
+ goto out_free_page;
*dma_addrp = (iommu->tbl.table_map_base +
((iopte - iommu->page_table) << IO_PAGE_SHIFT));
@@ -722,18 +727,26 @@ static void *dma_4u_alloc(struct device *dev, size_t size,
}
return ret;
+
+out_free_page:
+ free_pages(first_page, order);
+ return NULL;
}
static void dma_4u_free(struct device *dev, size_t size, void *cpu,
dma_addr_t dvma, unsigned long attrs)
{
- struct iommu *iommu;
- unsigned long order, npages;
+ unsigned long order;
- npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
- iommu = dev->archdata.iommu;
+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ dma_4u_unmap_page(dev, dvma, size, DMA_BIDIRECTIONAL, 0);
+ } else {
+ struct iommu *iommu = dev->archdata.iommu;
- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(&iommu->tbl, dvma,
+ IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT,
+ IOMMU_ERROR_CODE);
+ }
order = get_order(size);
if (order < 10)
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 06/10] sparc64/iommu: implement DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 ` [PATCH 06/10] sparc64/iommu: implement DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-09 4:58 ` David Miller
0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2018-12-09 4:58 UTC (permalink / raw)
To: hch
Cc: linux-parisc, ezequiel, vgupta, linux-mips, dri-devel, matwey,
iommu, openrisc, laurent.pinchart, sparclinux, linux-snps-arc,
robin.murphy, linux-arm-kernel, linux-media
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 8 Dec 2018 09:36:58 -0800
> Just allocate the memory and use map_page to map the memory.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 07/10] sparc64/pci_sun4v: move code around a bit
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
` (5 preceding siblings ...)
2018-12-08 17:36 ` [PATCH 06/10] sparc64/iommu: implement DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-08 17:36 ` Christoph Hellwig
2018-12-09 4:58 ` David Miller
[not found] ` <20181208173702.15158-1-hch-jcswGhMUV9g@public.gmane.org>
` (2 subsequent siblings)
9 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:36 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
Move the alloc / free routines down the file so that we can easily use
the map / unmap helpers to implement non-consistent allocations.
Also drop the _coherent postfix to match the method name.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/sparc/kernel/pci_sun4v.c | 229 +++++++++++++++++-----------------
1 file changed, 114 insertions(+), 115 deletions(-)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index fa0e42b4cbfb..b95c70136559 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -171,87 +171,6 @@ static inline long iommu_batch_end(u64 mask)
return iommu_batch_flush(p, mask);
}
-static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addrp, gfp_t gfp,
- unsigned long attrs)
-{
- u64 mask;
- unsigned long flags, order, first_page, npages, n;
- unsigned long prot = 0;
- struct iommu *iommu;
- struct atu *atu;
- struct iommu_map_table *tbl;
- struct page *page;
- void *ret;
- long entry;
- int nid;
-
- size = IO_PAGE_ALIGN(size);
- order = get_order(size);
- if (unlikely(order >= MAX_ORDER))
- return NULL;
-
- npages = size >> IO_PAGE_SHIFT;
-
- if (attrs & DMA_ATTR_WEAK_ORDERING)
- prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
-
- nid = dev->archdata.numa_node;
- page = alloc_pages_node(nid, gfp, order);
- if (unlikely(!page))
- return NULL;
-
- first_page = (unsigned long) page_address(page);
- memset((char *)first_page, 0, PAGE_SIZE << order);
-
- iommu = dev->archdata.iommu;
- atu = iommu->atu;
-
- mask = dev->coherent_dma_mask;
- if (mask <= DMA_BIT_MASK(32))
- tbl = &iommu->tbl;
- else
- tbl = &atu->tbl;
-
- entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
- (unsigned long)(-1), 0);
-
- if (unlikely(entry == IOMMU_ERROR_CODE))
- goto range_alloc_fail;
-
- *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
- ret = (void *) first_page;
- first_page = __pa(first_page);
-
- local_irq_save(flags);
-
- iommu_batch_start(dev,
- (HV_PCI_MAP_ATTR_READ | prot |
- HV_PCI_MAP_ATTR_WRITE),
- entry);
-
- for (n = 0; n < npages; n++) {
- long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
- if (unlikely(err < 0L))
- goto iommu_map_fail;
- }
-
- if (unlikely(iommu_batch_end(mask) < 0L))
- goto iommu_map_fail;
-
- local_irq_restore(flags);
-
- return ret;
-
-iommu_map_fail:
- local_irq_restore(flags);
- iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
-
-range_alloc_fail:
- free_pages(first_page, order);
- return NULL;
-}
-
unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
unsigned long iotsb_num,
struct pci_bus *bus_dev)
@@ -316,38 +235,6 @@ static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
local_irq_restore(flags);
}
-static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
- dma_addr_t dvma, unsigned long attrs)
-{
- struct pci_pbm_info *pbm;
- struct iommu *iommu;
- struct atu *atu;
- struct iommu_map_table *tbl;
- unsigned long order, npages, entry;
- unsigned long iotsb_num;
- u32 devhandle;
-
- npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
- iommu = dev->archdata.iommu;
- pbm = dev->archdata.host_controller;
- atu = iommu->atu;
- devhandle = pbm->devhandle;
-
- if (dvma <= DMA_BIT_MASK(32)) {
- tbl = &iommu->tbl;
- iotsb_num = 0; /* we don't care for legacy iommu */
- } else {
- tbl = &atu->tbl;
- iotsb_num = atu->iotsb->iotsb_num;
- }
- entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
- dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
- iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
- order = get_order(size);
- if (order < 10)
- free_pages((unsigned long)cpu, order);
-}
-
static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t sz,
enum dma_data_direction direction,
@@ -671,6 +558,118 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
local_irq_restore(flags);
}
+static void *dma_4v_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs)
+{
+ u64 mask;
+ unsigned long flags, order, first_page, npages, n;
+ unsigned long prot = 0;
+ struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ struct page *page;
+ void *ret;
+ long entry;
+ int nid;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (unlikely(order >= MAX_ORDER))
+ return NULL;
+
+ npages = size >> IO_PAGE_SHIFT;
+
+ if (attrs & DMA_ATTR_WEAK_ORDERING)
+ prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
+
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ return NULL;
+
+ first_page = (unsigned long) page_address(page);
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
+ mask = dev->coherent_dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
+ (unsigned long)(-1), 0);
+
+ if (unlikely(entry == IOMMU_ERROR_CODE))
+ goto range_alloc_fail;
+
+ *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ first_page = __pa(first_page);
+
+ local_irq_save(flags);
+
+ iommu_batch_start(dev,
+ (HV_PCI_MAP_ATTR_READ | prot |
+ HV_PCI_MAP_ATTR_WRITE),
+ entry);
+
+ for (n = 0; n < npages; n++) {
+ long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
+ if (unlikely(err < 0L))
+ goto iommu_map_fail;
+ }
+
+ if (unlikely(iommu_batch_end(mask) < 0L))
+ goto iommu_map_fail;
+
+ local_irq_restore(flags);
+
+ return ret;
+
+iommu_map_fail:
+ local_irq_restore(flags);
+ iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+
+range_alloc_fail:
+ free_pages(first_page, order);
+ return NULL;
+}
+
+static void dma_4v_free(struct device *dev, size_t size, void *cpu,
+ dma_addr_t dvma, unsigned long attrs)
+{
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ unsigned long order, npages, entry;
+ unsigned long iotsb_num;
+ u32 devhandle;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ iommu = dev->archdata.iommu;
+ pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
+ devhandle = pbm->devhandle;
+
+ if (dvma <= DMA_BIT_MASK(32)) {
+ tbl = &iommu->tbl;
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ } else {
+ tbl = &atu->tbl;
+ iotsb_num = atu->iotsb->iotsb_num;
+ }
+ entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+ dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
static int dma_4v_supported(struct device *dev, u64 device_mask)
{
struct iommu *iommu = dev->archdata.iommu;
@@ -689,8 +688,8 @@ static int dma_4v_supported(struct device *dev, u64 device_mask)
}
static const struct dma_map_ops sun4v_dma_ops = {
- .alloc = dma_4v_alloc_coherent,
- .free = dma_4v_free_coherent,
+ .alloc = dma_4v_alloc,
+ .free = dma_4v_free,
.map_page = dma_4v_map_page,
.unmap_page = dma_4v_unmap_page,
.map_sg = dma_4v_map_sg,
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 07/10] sparc64/pci_sun4v: move code around a bit
2018-12-08 17:36 ` [PATCH 07/10] sparc64/pci_sun4v: move code around a bit Christoph Hellwig
@ 2018-12-09 4:58 ` David Miller
0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2018-12-09 4:58 UTC (permalink / raw)
To: hch
Cc: linux-parisc, ezequiel, vgupta, linux-mips, dri-devel, matwey,
iommu, openrisc, laurent.pinchart, sparclinux, linux-snps-arc,
robin.murphy, linux-arm-kernel, linux-media
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 8 Dec 2018 09:36:59 -0800
> Move the alloc / free routines down the file so that we can easily use
> the map / unmap helpers to implement non-consistent allocations.
>
> Also drop the _coherent postfix to match the method name.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <20181208173702.15158-1-hch-jcswGhMUV9g@public.gmane.org>]
* [PATCH 08/10] sparc64/pci_sun4v: implement DMA_ATTR_NON_CONSISTENT
[not found] ` <20181208173702.15158-1-hch-jcswGhMUV9g@public.gmane.org>
@ 2018-12-08 17:37 ` Christoph Hellwig
2018-12-09 4:58 ` David Miller
0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:37 UTC (permalink / raw)
To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Cc: linux-mips-u79uwXL29TY76Z2rM5mHXA,
linux-parisc-u79uwXL29TY76Z2rM5mHXA, Vineet Gupta, Robin Murphy,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Matwey V. Kornilov,
openrisc-cunTk1MwBs9a3B2Vnqf2dGD2FQJk+8+b, Laurent Pinchart,
sparclinux-u79uwXL29TY76Z2rM5mHXA,
linux-snps-arc-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Ezequiel Garcia,
linux-arm-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-media-u79uwXL29TY76Z2rM5mHXA
Just allocate the memory and use map_page to map the memory.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
arch/sparc/kernel/pci_sun4v.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index b95c70136559..24a76ecf2986 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -590,6 +590,14 @@ static void *dma_4v_alloc(struct device *dev, size_t size,
first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order);
+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ *dma_addrp = dma_4v_map_page(dev, page, 0, size,
+ DMA_BIDIRECTIONAL, 0);
+ if (*dma_addrp == DMA_MAPPING_ERROR)
+ goto range_alloc_fail;
+ return page_address(page);
+ }
+
iommu = dev->archdata.iommu;
atu = iommu->atu;
@@ -649,6 +657,11 @@ static void dma_4v_free(struct device *dev, size_t size, void *cpu,
unsigned long iotsb_num;
u32 devhandle;
+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ dma_4v_unmap_page(dev, dvma, size, DMA_BIDIRECTIONAL, 0);
+ goto free_pages;
+ }
+
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
@@ -665,6 +678,7 @@ static void dma_4v_free(struct device *dev, size_t size, void *cpu,
entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
+free_pages:
order = get_order(size);
if (order < 10)
free_pages((unsigned long)cpu, order);
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH 08/10] sparc64/pci_sun4v: implement DMA_ATTR_NON_CONSISTENT
2018-12-08 17:37 ` [PATCH 08/10] sparc64/pci_sun4v: implement DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-09 4:58 ` David Miller
0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2018-12-09 4:58 UTC (permalink / raw)
To: hch
Cc: linux-parisc, ezequiel, vgupta, linux-mips, dri-devel, matwey,
iommu, openrisc, laurent.pinchart, sparclinux, linux-snps-arc,
robin.murphy, linux-arm-kernel, linux-media
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 8 Dec 2018 09:37:00 -0800
> Just allocate the memory and use map_page to map the memory.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 09/10] dma-mapping: skip declared coherent memory for DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
` (7 preceding siblings ...)
[not found] ` <20181208173702.15158-1-hch-jcswGhMUV9g@public.gmane.org>
@ 2018-12-08 17:37 ` Christoph Hellwig
2018-12-08 17:37 ` [PATCH 10/10] Documentation: update the description " Christoph Hellwig
9 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:37 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
Memory declared using dma_declare_coherent is ioremapped and thus not
always suitable for our tightened DMA_ATTR_NON_CONSISTENT definition.
Skip it given all the existing callers don't DMA_ATTR_NON_CONSISTENT
anyway.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/dma-mapping.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 7799c2b27849..8c81fa5d1f44 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -521,7 +521,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
BUG_ON(!ops);
WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
- if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
+ if (!(attrs & DMA_ATTR_NON_CONSISTENT) &&
+ dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;
/* let the implementation decide on the zone to allocate from: */
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH 10/10] Documentation: update the description for DMA_ATTR_NON_CONSISTENT
2018-12-08 17:36 make the non-consistent DMA allocator more userful Christoph Hellwig
` (8 preceding siblings ...)
2018-12-08 17:37 ` [PATCH 09/10] dma-mapping: skip declared coherent memory for DMA_ATTR_NON_CONSISTENT Christoph Hellwig
@ 2018-12-08 17:37 ` Christoph Hellwig
9 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2018-12-08 17:37 UTC (permalink / raw)
To: iommu
Cc: linux-mips, linux-parisc, Vineet Gupta, Robin Murphy, dri-devel,
Matwey V. Kornilov, openrisc, Laurent Pinchart, sparclinux,
linux-snps-arc, Ezequiel Garcia, linux-arm-kernel, linux-media
We got rid of the odd selective consistent or not behavior, and now
want the normal dma_sync_single_* functions to be used for strict
ownership transfers. While dma_cache_sync hasn't been removed from
the tree yet it should not be used in any new caller, so documentation
for it is dropped here.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
Documentation/DMA-API.txt | 30 ++++--------------------------
Documentation/DMA-attributes.txt | 9 +++++----
include/linux/dma-mapping.h | 3 +++
3 files changed, 12 insertions(+), 30 deletions(-)
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index ac66ae2509a9..c81fe8a4aeec 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -518,20 +518,9 @@ API at all.
dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, unsigned long attrs)
-Identical to dma_alloc_coherent() except that when the
-DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the
-platform will choose to return either consistent or non-consistent memory
-as it sees fit. By using this API, you are guaranteeing to the platform
-that you have all the correct and necessary sync points for this memory
-in the driver should it choose to return non-consistent memory.
-
-Note: where the platform can return consistent memory, it will
-guarantee that the sync points become nops.
-
-Warning: Handling non-consistent memory is a real pain. You should
-only use this API if you positively know your driver will be
-required to work on one of the rare (usually non-PCI) architectures
-that simply cannot make consistent memory.
+Similar to dma_alloc_coherent(), except that the behavior can be controlled
+in more detail using the attrs argument. See Documentation/DMA-attributes.txt
+for more details.
::
@@ -540,7 +529,7 @@ that simply cannot make consistent memory.
dma_addr_t dma_handle, unsigned long attrs)
Free memory allocated by the dma_alloc_attrs(). All parameters common
-parameters must identical to those otherwise passed to dma_fre_coherent,
+parameters must identical to those otherwise passed to dma_free_coherent,
and the attrs argument must be identical to the attrs passed to
dma_alloc_attrs().
@@ -560,17 +549,6 @@ memory or doing partial flushes.
into the width returned by this call. It will also always be a power
of two for easy alignment.
-::
-
- void
- dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction)
-
-Do a partial sync of memory that was allocated by dma_alloc_attrs() with
-the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and
-continuing on for size. Again, you *must* observe the cache line
-boundaries when doing this.
-
::
int
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 8f8d97f65d73..2bb3fc0a621b 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -46,10 +46,11 @@ behavior.
DMA_ATTR_NON_CONSISTENT
-----------------------
-DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
-consistent or non-consistent memory as it sees fit. By using this API,
-you are guaranteeing to the platform that you have all the correct and
-necessary sync points for this memory in the driver.
+DMA_ATTR_NON_CONSISTENT specifies that the memory returned is not
+required to be consistent. The memory is owned by the device when
+returned from this function, and ownership must be explicitly
+transferred to the CPU using dma_sync_single_for_cpu, and back to the
+device using dma_sync_single_for_device.
DMA_ATTR_NO_KERNEL_MAPPING
--------------------------
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8c81fa5d1f44..8757ad5087c4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -432,6 +432,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
+/*
+ * Don't use in new code, use dma_sync_single_for_{device,cpu} instead.
+ */
static inline void
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir)
--
2.19.2
^ permalink raw reply related [flat|nested] 19+ messages in thread