* [RFC PATCH] ARM: add coherent dma ops
@ 2012-08-09 5:37 Rob Herring
2012-08-13 6:15 ` Marek Szyprowski
0 siblings, 1 reply; 3+ messages in thread
From: Rob Herring @ 2012-08-09 5:37 UTC (permalink / raw)
To: linux-arm-kernel
From: Rob Herring <rob.herring@calxeda.com>
arch_is_coherent is problematic as it is a global symbol. This
doesn't work for multi-platform kernels or platforms which can support
per device coherent DMA.
This adds arm_coherent_dma_ops to be used for devices which connected
coherently (i.e. to the ACP port on Cortex-A9 or A15). The arm_dma_ops
are modified at boot when arch_is_coherent is true.
This does not address arch_is_coherent used in iommu dma ops.
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
---
Compile tested only.
arch/arm/mm/dma-mapping.c | 89 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 77 insertions(+), 12 deletions(-)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c2cdf65..8875cd4 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_cpu_to_dev(page, offset, size, dir);
return pfn_to_dma(dev, page_to_pfn(page)) + offset;
}
+static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ return pfn_to_dma(dev, page_to_pfn(page)) + offset;
+}
+
/**
* arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
handle & ~PAGE_MASK, size, dir);
}
@@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev,
{
unsigned int offset = handle & (PAGE_SIZE - 1);
struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
- if (!arch_is_coherent())
- __dma_page_dev_to_cpu(page, offset, size, dir);
+ __dma_page_dev_to_cpu(page, offset, size, dir);
}
static void arm_dma_sync_single_for_device(struct device *dev,
@@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev,
{
unsigned int offset = handle & (PAGE_SIZE - 1);
struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
- if (!arch_is_coherent())
- __dma_page_cpu_to_dev(page, offset, size, dir);
+ __dma_page_cpu_to_dev(page, offset, size, dir);
}
static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
@@ -138,6 +143,40 @@ struct dma_map_ops arm_dma_ops = {
};
EXPORT_SYMBOL(arm_dma_ops);
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs);
+
+struct dma_map_ops arm_coherent_dma_ops = {
+ .alloc = arm_coherent_dma_alloc,
+ .free = arm_coherent_dma_free,
+ .mmap = arm_dma_mmap,
+ .get_sgtable = arm_dma_get_sgtable,
+ .map_page = arm_coherent_dma_map_page,
+ .map_sg = arm_dma_map_sg,
+ .set_dma_mask = arm_dma_set_mask,
+};
+EXPORT_SYMBOL(arm_coherent_dma_ops);
+
+static int __init dma_map_init(void)
+{
+ if (!arch_is_coherent())
+ return 0;
+
+ arm_dma_ops.map_page = arm_coherent_dma_map_page;
+ arm_dma_ops.unmap_page = NULL;
+ arm_dma_ops.map_sg = NULL;
+ arm_dma_ops.unmap_sg = NULL;
+ arm_dma_ops.sync_single_for_cpu = NULL;
+ arm_dma_ops.sync_single_for_device = NULL;
+ arm_dma_ops.sync_sg_for_cpu = NULL;
+ arm_dma_ops.sync_sg_for_device = NULL;
+ arm_dma_ops.alloc = arm_coherent_dma_alloc;
+ arm_dma_ops.free = arm_coherent_dma_free;
+}
+core_initcall(dma_map_init);
+
static u64 get_coherent_dma_mask(struct device *dev)
{
u64 mask = (u64)arm_dma_limit;
@@ -538,7 +577,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
- gfp_t gfp, pgprot_t prot, const void *caller)
+ gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
{
u64 mask = get_coherent_dma_mask(dev);
struct page *page;
@@ -571,7 +610,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
*handle = DMA_ERROR_CODE;
size = PAGE_ALIGN(size);
- if (arch_is_coherent() || nommu())
+ if (is_coherent || nommu())
addr = __alloc_simple_buffer(dev, size, gfp, &page);
else if (gfp & GFP_ATOMIC)
addr = __alloc_from_pool(size, &page);
@@ -599,7 +638,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (dma_alloc_from_coherent(dev, size, handle, &memory))
return memory;
- return __dma_alloc(dev, size, handle, gfp, prot,
+ return __dma_alloc(dev, size, handle, gfp, prot, false,
+ __builtin_return_address(0));
+}
+
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+{
+ pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
+ void *memory;
+
+ if (dma_alloc_from_coherent(dev, size, handle, &memory))
+ return memory;
+
+ return __dma_alloc(dev, size, handle, gfp, prot, true,
__builtin_return_address(0));
}
@@ -636,8 +688,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
/*
* Free a buffer as defined by the above mapping.
*/
-void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t handle, struct dma_attrs *attrs)
+static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs,
+ bool is_coherent)
{
struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
@@ -646,7 +699,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
size = PAGE_ALIGN(size);
- if (arch_is_coherent() || nommu()) {
+ if (is_coherent || nommu()) {
__dma_free_buffer(page, size);
} else if (!IS_ENABLED(CONFIG_CMA)) {
__dma_free_remap(cpu_addr, size);
@@ -662,6 +715,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
}
}
+void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs)
+{
+ __arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
+}
+
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs)
+{
+ __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
+}
+
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
struct dma_attrs *attrs)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [RFC PATCH] ARM: add coherent dma ops
2012-08-09 5:37 [RFC PATCH] ARM: add coherent dma ops Rob Herring
@ 2012-08-13 6:15 ` Marek Szyprowski
2012-08-13 22:35 ` Rob Herring
0 siblings, 1 reply; 3+ messages in thread
From: Marek Szyprowski @ 2012-08-13 6:15 UTC (permalink / raw)
To: linux-arm-kernel
Hi Rob,
On Thursday, August 09, 2012 7:37 AM Rob Herring wrote:
> From: Rob Herring <rob.herring@calxeda.com>
>
> arch_is_coherent is problematic as it is a global symbol. This
> doesn't work for multi-platform kernels or platforms which can support
> per device coherent DMA.
>
> This adds arm_coherent_dma_ops to be used for devices which connected
> coherently (i.e. to the ACP port on Cortex-A9 or A15). The arm_dma_ops
> are modified at boot when arch_is_coherent is true.
Thanks for the patch. I had something similar on my TODO list, but had not enough time for
it. I like this patch but I have some comments.
> This does not address arch_is_coherent used in iommu dma ops.
In the initial version we might get rid of arch_is_coherent() usage in iommu dma ops and
implement it when a real coherent hw with iommu will be available.
> Signed-off-by: Rob Herring <rob.herring@calxeda.com>
> Cc: Russell King <linux@arm.linux.org.uk>
> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
> ---
> Compile tested only.
> arch/arm/mm/dma-mapping.c | 89 +++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 77 insertions(+), 12 deletions(-)
>
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index c2cdf65..8875cd4 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
> unsigned long offset, size_t size, enum dma_data_direction dir,
> struct dma_attrs *attrs)
> {
> - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> __dma_page_cpu_to_dev(page, offset, size, dir);
> return pfn_to_dma(dev, page_to_pfn(page)) + offset;
> }
>
> +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
> + unsigned long offset, size_t size, enum dma_data_direction dir,
> + struct dma_attrs *attrs)
> +{
> + return pfn_to_dma(dev, page_to_pfn(page)) + offset;
> +}
> +
> /**
> * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
> * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
> @@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
> size_t size, enum dma_data_direction dir,
> struct dma_attrs *attrs)
> {
> - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
> __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
> handle & ~PAGE_MASK, size, dir);
> }
> @@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev,
> {
> unsigned int offset = handle & (PAGE_SIZE - 1);
> struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
> - if (!arch_is_coherent())
> - __dma_page_dev_to_cpu(page, offset, size, dir);
> + __dma_page_dev_to_cpu(page, offset, size, dir);
> }
>
> static void arm_dma_sync_single_for_device(struct device *dev,
> @@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev,
> {
> unsigned int offset = handle & (PAGE_SIZE - 1);
> struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
> - if (!arch_is_coherent())
> - __dma_page_cpu_to_dev(page, offset, size, dir);
> + __dma_page_cpu_to_dev(page, offset, size, dir);
> }
>
> static int arm_dma_set_mask(struct device *dev, u64 dma_mask);
> @@ -138,6 +143,40 @@ struct dma_map_ops arm_dma_ops = {
> };
> EXPORT_SYMBOL(arm_dma_ops);
>
> +static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
> + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs);
> +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs);
> +
> +struct dma_map_ops arm_coherent_dma_ops = {
> + .alloc = arm_coherent_dma_alloc,
> + .free = arm_coherent_dma_free,
> + .mmap = arm_dma_mmap,
> + .get_sgtable = arm_dma_get_sgtable,
> + .map_page = arm_coherent_dma_map_page,
> + .map_sg = arm_dma_map_sg,
> + .set_dma_mask = arm_dma_set_mask,
> +};
> +EXPORT_SYMBOL(arm_coherent_dma_ops);
> +
> +static int __init dma_map_init(void)
> +{
> + if (!arch_is_coherent())
> + return 0;
> +
> + arm_dma_ops.map_page = arm_coherent_dma_map_page;
> + arm_dma_ops.unmap_page = NULL;
> + arm_dma_ops.map_sg = NULL;
> + arm_dma_ops.unmap_sg = NULL;
> + arm_dma_ops.sync_single_for_cpu = NULL;
> + arm_dma_ops.sync_single_for_device = NULL;
> + arm_dma_ops.sync_sg_for_cpu = NULL;
> + arm_dma_ops.sync_sg_for_device = NULL;
> + arm_dma_ops.alloc = arm_coherent_dma_alloc;
> + arm_dma_ops.free = arm_coherent_dma_free;
> +}
> +core_initcall(dma_map_init);
I would implement it in a bit different way. Overwriting structure entries is not the
cleanest approach and might lead to some misunderstandings. I would rather change
get_dma_ops() function in arch/arm/include/asm/dma-mapping.h to something like this:
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
if (dev && dev->archdata.dma_ops)
return dev->archdata.dma_ops;
return !arch_is_coherent() ? &arm_dma_ops : &arm_coherent_dma_ops;
}
This way the code is easy to understand and compiler can easily optimize out the above
check for 99% of architectures which are either coherent or not. In case of partially
coherent architectures, arch_is_coherent() will probably return false and coherent
devices will get their dma_map_ops initialized by platform code.
> +
> static u64 get_coherent_dma_mask(struct device *dev)
> {
> u64 mask = (u64)arm_dma_limit;
> @@ -538,7 +577,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t
> gfp,
>
>
> static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> - gfp_t gfp, pgprot_t prot, const void *caller)
> + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller)
> {
> u64 mask = get_coherent_dma_mask(dev);
> struct page *page;
> @@ -571,7 +610,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t
> *handle,
> *handle = DMA_ERROR_CODE;
> size = PAGE_ALIGN(size);
>
> - if (arch_is_coherent() || nommu())
> + if (is_coherent || nommu())
> addr = __alloc_simple_buffer(dev, size, gfp, &page);
> else if (gfp & GFP_ATOMIC)
> addr = __alloc_from_pool(size, &page);
> @@ -599,7 +638,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
> if (dma_alloc_from_coherent(dev, size, handle, &memory))
> return memory;
>
> - return __dma_alloc(dev, size, handle, gfp, prot,
> + return __dma_alloc(dev, size, handle, gfp, prot, false,
> + __builtin_return_address(0));
> +}
> +
> +static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
> + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
> +{
> + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
> + void *memory;
> +
> + if (dma_alloc_from_coherent(dev, size, handle, &memory))
> + return memory;
> +
> + return __dma_alloc(dev, size, handle, gfp, prot, true,
> __builtin_return_address(0));
> }
>
> @@ -636,8 +688,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
> /*
> * Free a buffer as defined by the above mapping.
> */
> -void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> - dma_addr_t handle, struct dma_attrs *attrs)
> +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs,
> + bool is_coherent)
> {
> struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
>
> @@ -646,7 +699,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
>
> size = PAGE_ALIGN(size);
>
> - if (arch_is_coherent() || nommu()) {
> + if (is_coherent || nommu()) {
> __dma_free_buffer(page, size);
> } else if (!IS_ENABLED(CONFIG_CMA)) {
> __dma_free_remap(cpu_addr, size);
> @@ -662,6 +715,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> }
> }
>
> +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs)
> +{
> + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
> +}
> +
> +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
> + dma_addr_t handle, struct dma_attrs *attrs)
> +{
> + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
> +}
> +
> int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
> void *cpu_addr, dma_addr_t handle, size_t size,
> struct dma_attrs *attrs)
> --
> 1.7.9.5
Best regards
--
Marek Szyprowski
Samsung Poland R&D Center
^ permalink raw reply [flat|nested] 3+ messages in thread
* [RFC PATCH] ARM: add coherent dma ops
2012-08-13 6:15 ` Marek Szyprowski
@ 2012-08-13 22:35 ` Rob Herring
0 siblings, 0 replies; 3+ messages in thread
From: Rob Herring @ 2012-08-13 22:35 UTC (permalink / raw)
To: linux-arm-kernel
On 08/13/2012 01:15 AM, Marek Szyprowski wrote:
> Hi Rob,
>
> On Thursday, August 09, 2012 7:37 AM Rob Herring wrote:
>
>> From: Rob Herring <rob.herring@calxeda.com>
>>
>> arch_is_coherent is problematic as it is a global symbol. This
>> doesn't work for multi-platform kernels or platforms which can support
>> per device coherent DMA.
>>
>> This adds arm_coherent_dma_ops to be used for devices which connected
>> coherently (i.e. to the ACP port on Cortex-A9 or A15). The arm_dma_ops
>> are modified at boot when arch_is_coherent is true.
>
> Thanks for the patch. I had something similar on my TODO list, but had not enough time for
> it. I like this patch but I have some comments.
>
>> This does not address arch_is_coherent used in iommu dma ops.
>
> In the initial version we might get rid of arch_is_coherent() usage in iommu dma ops and
> implement it when a real coherent hw with iommu will be available.
Well, if you are fine with the overall approach, then I can update iommu
functions too.
>> +static int __init dma_map_init(void)
>> +{
>> + if (!arch_is_coherent())
>> + return 0;
>> +
>> + arm_dma_ops.map_page = arm_coherent_dma_map_page;
>> + arm_dma_ops.unmap_page = NULL;
>> + arm_dma_ops.map_sg = NULL;
>> + arm_dma_ops.unmap_sg = NULL;
>> + arm_dma_ops.sync_single_for_cpu = NULL;
>> + arm_dma_ops.sync_single_for_device = NULL;
>> + arm_dma_ops.sync_sg_for_cpu = NULL;
>> + arm_dma_ops.sync_sg_for_device = NULL;
>> + arm_dma_ops.alloc = arm_coherent_dma_alloc;
>> + arm_dma_ops.free = arm_coherent_dma_free;
>> +}
>> +core_initcall(dma_map_init);
>
> I would implement it in a bit different way. Overwriting structure entries is not the
> cleanest approach and might lead to some misunderstandings. I would rather change
> get_dma_ops() function in arch/arm/include/asm/dma-mapping.h to something like this:
>
> static inline struct dma_map_ops *get_dma_ops(struct device *dev)
> {
> if (dev && dev->archdata.dma_ops)
> return dev->archdata.dma_ops;
> return !arch_is_coherent() ? &arm_dma_ops : &arm_coherent_dma_ops;
> }
>
> This way the code is easy to understand and compiler can easily optimize out the above
> check for 99% of architectures which are either coherent or not. In case of partially
> coherent architectures, arch_is_coherent() will probably return false and coherent
> devices will get their dma_map_ops initialized by platform code.
Yes, that's much better.
Rob
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-08-13 22:35 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-09 5:37 [RFC PATCH] ARM: add coherent dma ops Rob Herring
2012-08-13 6:15 ` Marek Szyprowski
2012-08-13 22:35 ` Rob Herring
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).