[PATCH] x86: enable DMA CMA with swiotlb

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] x86: enable DMA CMA with swiotlb
@ 2013-11-19 23:32 Akinobu Mita
  2013-11-20 14:00 ` Konrad Rzeszutek Wilk
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Akinobu Mita @ 2013-11-19 23:32 UTC (permalink / raw)
  To: linux-kernel, akpm
  Cc: Akinobu Mita, Marek Szyprowski, Konrad Rzeszutek Wilk,
	Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86

The DMA Contiguous Memory Allocator support on x86 is disabled when
swiotlb config option is enabled.  So DMA CMA is always disabled on
x86_64 because swiotlb is always enabled.  This attempts to support
for DMA CMA with enabling swiotlb config option.

The contiguous memory allocator on x86 is integrated in the function
dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops
for dma_alloc_coherent().

x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops
tries to allocate with dma_generic_alloc_coherent() firstly and then
swiotlb_alloc_coherent() is called as a fallback.

The main part of supporting DMA CMA with swiotlb is that changing
x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops
for dma_free_coherent() so that it can distinguish memory allocated by
dma_generic_alloc_coherent() from one allocated by swiotlb_alloc_coherent()
and release it with dma_generic_free_coherent() which can handle contiguous
memory.  This change requires making is_swiotlb_buffer() global function.

This also needs to change .free callback in the dma_map_ops for amd_gart
and sta2x11, because these dma_ops are also using
dma_generic_alloc_coherent().

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
---
 arch/x86/Kconfig               | 2 +-
 arch/x86/include/asm/swiotlb.h | 7 +++++++
 arch/x86/kernel/amd_gart_64.c  | 2 +-
 arch/x86/kernel/pci-swiotlb.c  | 9 ++++++---
 arch/x86/pci/sta2x11-fixup.c   | 6 ++----
 include/linux/swiotlb.h        | 2 ++
 lib/swiotlb.c                  | 2 +-
 7 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e903c71..b15df8b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -39,7 +39,7 @@ config X86
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_DMA_ATTRS
-	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
+	select HAVE_DMA_CONTIGUOUS
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
 	select HAVE_KPROBES_ON_FTRACE
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 977f176..ab05d73 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
+extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flags,
+					struct dma_attrs *attrs);
+extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+					void *vaddr, dma_addr_t dma_addr,
+					struct dma_attrs *attrs);
+
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b574b29..8e3842f 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 		   dma_addr_t dma_addr, struct dma_attrs *attrs)
 {
 	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
-	free_pages((unsigned long)vaddr, get_order(size));
+	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
 static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6c483ba..77dd0ad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -14,7 +14,7 @@
 #include <asm/iommu_table.h>
 int swiotlb __read_mostly;
 
-static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags,
 					struct dma_attrs *attrs)
 {
@@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
 }
 
-static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_addr,
 				      struct dma_attrs *attrs)
 {
-	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
+		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+	else
+		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
 static struct dma_map_ops swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 9d8a509..5ceda85 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
 {
 	void *vaddr;
 
-	vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
-	if (!vaddr)
-		vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
 	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
 	return vaddr;
 }
@@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
 /* We have our own dma_ops: the same as swiotlb but from alloc (above) */
 static struct dma_map_ops sta2x11_dma_ops = {
 	.alloc = sta2x11_swiotlb_alloc_coherent,
-	.free = swiotlb_free_coherent,
+	.free = x86_swiotlb_free_coherent,
 	.map_page = swiotlb_map_page,
 	.unmap_page = swiotlb_unmap_page,
 	.map_sg = swiotlb_map_sg_attrs,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a5ffd32..e7a018e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { }
 #endif
 
 extern void swiotlb_print_info(void);
+extern int is_swiotlb_buffer(phys_addr_t paddr);
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index fe978e0..6e4a798 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -369,7 +369,7 @@ void __init swiotlb_free(void)
 	io_tlb_nslabs = 0;
 }
 
-static int is_swiotlb_buffer(phys_addr_t paddr)
+int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
-- 
1.8.3.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-19 23:32 [PATCH] x86: enable DMA CMA with swiotlb Akinobu Mita
@ 2013-11-20 14:00 ` Konrad Rzeszutek Wilk
  2013-11-20 23:35   ` Akinobu Mita
  2013-11-20 23:47 ` H. Peter Anvin
  2013-11-22  9:15 ` Marek Szyprowski
  2 siblings, 1 reply; 11+ messages in thread
From: Konrad Rzeszutek Wilk @ 2013-11-20 14:00 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: linux-kernel, akpm, Marek Szyprowski, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, x86

On Wed, Nov 20, 2013 at 08:32:19AM +0900, Akinobu Mita wrote:
> The DMA Contiguous Memory Allocator support on x86 is disabled when
> swiotlb config option is enabled.  So DMA CMA is always disabled on
> x86_64 because swiotlb is always enabled.  This attempts to support
> for DMA CMA with enabling swiotlb config option.
> 
> The contiguous memory allocator on x86 is integrated in the function
> dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops
> for dma_alloc_coherent().
> 
> x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops
> tries to allocate with dma_generic_alloc_coherent() firstly and then
> swiotlb_alloc_coherent() is called as a fallback.
> 
> The main part of supporting DMA CMA with swiotlb is that changing
> x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops
> for dma_free_coherent() so that it can distinguish memory allocated by
> dma_generic_alloc_coherent() from one allocated by swiotlb_alloc_coherent()
> and release it with dma_generic_free_coherent() which can handle contiguous
> memory.  This change requires making is_swiotlb_buffer() global function.
> 
> This also needs to change .free callback in the dma_map_ops for amd_gart
> and sta2x11, because these dma_ops are also using
> dma_generic_alloc_coherent().
> 
> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: x86@kernel.org
> Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
> ---
>  arch/x86/Kconfig               | 2 +-
>  arch/x86/include/asm/swiotlb.h | 7 +++++++
>  arch/x86/kernel/amd_gart_64.c  | 2 +-
>  arch/x86/kernel/pci-swiotlb.c  | 9 ++++++---
>  arch/x86/pci/sta2x11-fixup.c   | 6 ++----
>  include/linux/swiotlb.h        | 2 ++
>  lib/swiotlb.c                  | 2 +-

Looks OK, but before this goes anywhere I need to run regression tests
on IA64 and the other users of SWIOTLB.


>  7 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index e903c71..b15df8b 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -39,7 +39,7 @@ config X86
>  	select ARCH_WANT_OPTIONAL_GPIOLIB
>  	select ARCH_WANT_FRAME_POINTERS
>  	select HAVE_DMA_ATTRS
> -	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
> +	select HAVE_DMA_CONTIGUOUS
>  	select HAVE_KRETPROBES
>  	select HAVE_OPTPROBES
>  	select HAVE_KPROBES_ON_FTRACE
> diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
> index 977f176..ab05d73 100644
> --- a/arch/x86/include/asm/swiotlb.h
> +++ b/arch/x86/include/asm/swiotlb.h
> @@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
>  
>  static inline void dma_mark_clean(void *addr, size_t size) {}
>  
> +extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> +					dma_addr_t *dma_handle, gfp_t flags,
> +					struct dma_attrs *attrs);
> +extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
> +					void *vaddr, dma_addr_t dma_addr,
> +					struct dma_attrs *attrs);
> +
>  #endif /* _ASM_X86_SWIOTLB_H */
> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
> index b574b29..8e3842f 100644
> --- a/arch/x86/kernel/amd_gart_64.c
> +++ b/arch/x86/kernel/amd_gart_64.c
> @@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
>  		   dma_addr_t dma_addr, struct dma_attrs *attrs)
>  {
>  	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
> -	free_pages((unsigned long)vaddr, get_order(size));
> +	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
>  }
>  
>  static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
> index 6c483ba..77dd0ad 100644
> --- a/arch/x86/kernel/pci-swiotlb.c
> +++ b/arch/x86/kernel/pci-swiotlb.c
> @@ -14,7 +14,7 @@
>  #include <asm/iommu_table.h>
>  int swiotlb __read_mostly;
>  
> -static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> +void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  					dma_addr_t *dma_handle, gfp_t flags,
>  					struct dma_attrs *attrs)
>  {
> @@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
>  }
>  
> -static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
> +void x86_swiotlb_free_coherent(struct device *dev, size_t size,
>  				      void *vaddr, dma_addr_t dma_addr,
>  				      struct dma_attrs *attrs)
>  {
> -	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
> +	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
> +		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
> +	else
> +		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
>  }
>  
>  static struct dma_map_ops swiotlb_dma_ops = {
> diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
> index 9d8a509..5ceda85 100644
> --- a/arch/x86/pci/sta2x11-fixup.c
> +++ b/arch/x86/pci/sta2x11-fixup.c
> @@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
>  {
>  	void *vaddr;
>  
> -	vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
> -	if (!vaddr)
> -		vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
> +	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
>  	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
>  	return vaddr;
>  }
> @@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
>  /* We have our own dma_ops: the same as swiotlb but from alloc (above) */
>  static struct dma_map_ops sta2x11_dma_ops = {
>  	.alloc = sta2x11_swiotlb_alloc_coherent,
> -	.free = swiotlb_free_coherent,
> +	.free = x86_swiotlb_free_coherent,
>  	.map_page = swiotlb_map_page,
>  	.unmap_page = swiotlb_unmap_page,
>  	.map_sg = swiotlb_map_sg_attrs,
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index a5ffd32..e7a018e 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { }
>  #endif
>  
>  extern void swiotlb_print_info(void);
> +extern int is_swiotlb_buffer(phys_addr_t paddr);
> +
>  #endif /* __LINUX_SWIOTLB_H */
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index fe978e0..6e4a798 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -369,7 +369,7 @@ void __init swiotlb_free(void)
>  	io_tlb_nslabs = 0;
>  }
>  
> -static int is_swiotlb_buffer(phys_addr_t paddr)
> +int is_swiotlb_buffer(phys_addr_t paddr)
>  {
>  	return paddr >= io_tlb_start && paddr < io_tlb_end;
>  }
> -- 
> 1.8.3.2
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-20 14:00 ` Konrad Rzeszutek Wilk
@ 2013-11-20 23:35   ` Akinobu Mita
  0 siblings, 0 replies; 11+ messages in thread
From: Akinobu Mita @ 2013-11-20 23:35 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: LKML, Andrew Morton, Marek Szyprowski, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, x86

2013/11/20 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>:
> On Wed, Nov 20, 2013 at 08:32:19AM +0900, Akinobu Mita wrote:
>> The DMA Contiguous Memory Allocator support on x86 is disabled when
>> swiotlb config option is enabled.  So DMA CMA is always disabled on
>> x86_64 because swiotlb is always enabled.  This attempts to support
>> for DMA CMA with enabling swiotlb config option.
>>
>> The contiguous memory allocator on x86 is integrated in the function
>> dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops
>> for dma_alloc_coherent().
>>
>> x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops
>> tries to allocate with dma_generic_alloc_coherent() firstly and then
>> swiotlb_alloc_coherent() is called as a fallback.
>>
>> The main part of supporting DMA CMA with swiotlb is that changing
>> x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops
>> for dma_free_coherent() so that it can distinguish memory allocated by
>> dma_generic_alloc_coherent() from one allocated by swiotlb_alloc_coherent()
>> and release it with dma_generic_free_coherent() which can handle contiguous
>> memory.  This change requires making is_swiotlb_buffer() global function.
>>
>> This also needs to change .free callback in the dma_map_ops for amd_gart
>> and sta2x11, because these dma_ops are also using
>> dma_generic_alloc_coherent().
>>
>> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: "H. Peter Anvin" <hpa@zytor.com>
>> Cc: x86@kernel.org
>> Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
>> ---
>>  arch/x86/Kconfig               | 2 +-
>>  arch/x86/include/asm/swiotlb.h | 7 +++++++
>>  arch/x86/kernel/amd_gart_64.c  | 2 +-
>>  arch/x86/kernel/pci-swiotlb.c  | 9 ++++++---
>>  arch/x86/pci/sta2x11-fixup.c   | 6 ++----
>>  include/linux/swiotlb.h        | 2 ++
>>  lib/swiotlb.c                  | 2 +-
>
> Looks OK, but before this goes anywhere I need to run regression tests
> on IA64 and the other users of SWIOTLB.

Thanks a lot for your review.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-19 23:32 [PATCH] x86: enable DMA CMA with swiotlb Akinobu Mita
  2013-11-20 14:00 ` Konrad Rzeszutek Wilk
@ 2013-11-20 23:47 ` H. Peter Anvin
  2013-11-21 15:02   ` Akinobu Mita
  2013-11-22  9:15 ` Marek Szyprowski
  2 siblings, 1 reply; 11+ messages in thread
From: H. Peter Anvin @ 2013-11-20 23:47 UTC (permalink / raw)
  To: Akinobu Mita, linux-kernel, akpm
  Cc: Marek Szyprowski, Konrad Rzeszutek Wilk, Thomas Gleixner,
	Ingo Molnar, x86

On 11/19/2013 03:32 PM, Akinobu Mita wrote:
> The DMA Contiguous Memory Allocator support on x86 is disabled when
> swiotlb config option is enabled.  So DMA CMA is always disabled on
> x86_64 because swiotlb is always enabled.  This attempts to support
> for DMA CMA with enabling swiotlb config option.

What is the use case?

	-hpa



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-20 23:47 ` H. Peter Anvin
@ 2013-11-21 15:02   ` Akinobu Mita
  2013-11-21 18:11     ` H. Peter Anvin
  0 siblings, 1 reply; 11+ messages in thread
From: Akinobu Mita @ 2013-11-21 15:02 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: LKML, Andrew Morton, Marek Szyprowski, Konrad Rzeszutek Wilk,
	Thomas Gleixner, Ingo Molnar, x86

2013/11/21 H. Peter Anvin <hpa@zytor.com>:
> On 11/19/2013 03:32 PM, Akinobu Mita wrote:
>> The DMA Contiguous Memory Allocator support on x86 is disabled when
>> swiotlb config option is enabled.  So DMA CMA is always disabled on
>> x86_64 because swiotlb is always enabled.  This attempts to support
>> for DMA CMA with enabling swiotlb config option.
>
> What is the use case?

I want to use DMA CMA for the feature that requires a single huge chunk
(about 64MB) of host memory.  Please refer 3 of 24 in:
http://www.jedec.org/sites/default/files/T_Fujisawa_MF_2013.pdf

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-21 15:02   ` Akinobu Mita
@ 2013-11-21 18:11     ` H. Peter Anvin
  2013-11-22 12:27       ` Akinobu Mita
  0 siblings, 1 reply; 11+ messages in thread
From: H. Peter Anvin @ 2013-11-21 18:11 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: LKML, Andrew Morton, Marek Szyprowski, Konrad Rzeszutek Wilk,
	Thomas Gleixner, Ingo Molnar, x86

On 11/21/2013 07:02 AM, Akinobu Mita wrote:
> 
> I want to use DMA CMA for the feature that requires a single huge chunk
> (about 64MB) of host memory.

That is tautological.  "I want to use the Contiguous Memory Allocator to
allocate contiguous memory."

> Please refer 3 of 24 in:
> http://www.jedec.org/sites/default/files/T_Fujisawa_MF_2013.pdf

But yes, it makes sense that a device using host memory would not be
able to leverage swiotlb, because the software simply has no clue what
the DMA transactions are.  In that sense it is fundamentally different
from a real IOTLB.

	-hpa

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-19 23:32 [PATCH] x86: enable DMA CMA with swiotlb Akinobu Mita
  2013-11-20 14:00 ` Konrad Rzeszutek Wilk
  2013-11-20 23:47 ` H. Peter Anvin
@ 2013-11-22  9:15 ` Marek Szyprowski
  2 siblings, 0 replies; 11+ messages in thread
From: Marek Szyprowski @ 2013-11-22  9:15 UTC (permalink / raw)
  To: Akinobu Mita, linux-kernel, akpm
  Cc: Konrad Rzeszutek Wilk, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86

Hello,

On 2013-11-20 00:32, Akinobu Mita wrote:
> The DMA Contiguous Memory Allocator support on x86 is disabled when
> swiotlb config option is enabled.  So DMA CMA is always disabled on
> x86_64 because swiotlb is always enabled.  This attempts to support
> for DMA CMA with enabling swiotlb config option.
>
> The contiguous memory allocator on x86 is integrated in the function
> dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops
> for dma_alloc_coherent().
>
> x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops
> tries to allocate with dma_generic_alloc_coherent() firstly and then
> swiotlb_alloc_coherent() is called as a fallback.
>
> The main part of supporting DMA CMA with swiotlb is that changing
> x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops
> for dma_free_coherent() so that it can distinguish memory allocated by
> dma_generic_alloc_coherent() from one allocated by swiotlb_alloc_coherent()
> and release it with dma_generic_free_coherent() which can handle contiguous
> memory.  This change requires making is_swiotlb_buffer() global function.
>
> This also needs to change .free callback in the dma_map_ops for amd_gart
> and sta2x11, because these dma_ops are also using
> dma_generic_alloc_coherent().
>
> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: x86@kernel.org
> Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>

Acked-by: Marek Szyprowski<m.szyprowski@samsung.com>

> ---
>   arch/x86/Kconfig               | 2 +-
>   arch/x86/include/asm/swiotlb.h | 7 +++++++
>   arch/x86/kernel/amd_gart_64.c  | 2 +-
>   arch/x86/kernel/pci-swiotlb.c  | 9 ++++++---
>   arch/x86/pci/sta2x11-fixup.c   | 6 ++----
>   include/linux/swiotlb.h        | 2 ++
>   lib/swiotlb.c                  | 2 +-
>   7 files changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index e903c71..b15df8b 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -39,7 +39,7 @@ config X86
>   	select ARCH_WANT_OPTIONAL_GPIOLIB
>   	select ARCH_WANT_FRAME_POINTERS
>   	select HAVE_DMA_ATTRS
> -	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
> +	select HAVE_DMA_CONTIGUOUS
>   	select HAVE_KRETPROBES
>   	select HAVE_OPTPROBES
>   	select HAVE_KPROBES_ON_FTRACE
> diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
> index 977f176..ab05d73 100644
> --- a/arch/x86/include/asm/swiotlb.h
> +++ b/arch/x86/include/asm/swiotlb.h
> @@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
>   
>   static inline void dma_mark_clean(void *addr, size_t size) {}
>   
> +extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> +					dma_addr_t *dma_handle, gfp_t flags,
> +					struct dma_attrs *attrs);
> +extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
> +					void *vaddr, dma_addr_t dma_addr,
> +					struct dma_attrs *attrs);
> +
>   #endif /* _ASM_X86_SWIOTLB_H */
> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
> index b574b29..8e3842f 100644
> --- a/arch/x86/kernel/amd_gart_64.c
> +++ b/arch/x86/kernel/amd_gart_64.c
> @@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
>   		   dma_addr_t dma_addr, struct dma_attrs *attrs)
>   {
>   	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
> -	free_pages((unsigned long)vaddr, get_order(size));
> +	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
>   }
>   
>   static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
> index 6c483ba..77dd0ad 100644
> --- a/arch/x86/kernel/pci-swiotlb.c
> +++ b/arch/x86/kernel/pci-swiotlb.c
> @@ -14,7 +14,7 @@
>   #include <asm/iommu_table.h>
>   int swiotlb __read_mostly;
>   
> -static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> +void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>   					dma_addr_t *dma_handle, gfp_t flags,
>   					struct dma_attrs *attrs)
>   {
> @@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>   	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
>   }
>   
> -static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
> +void x86_swiotlb_free_coherent(struct device *dev, size_t size,
>   				      void *vaddr, dma_addr_t dma_addr,
>   				      struct dma_attrs *attrs)
>   {
> -	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
> +	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
> +		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
> +	else
> +		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
>   }
>   
>   static struct dma_map_ops swiotlb_dma_ops = {
> diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
> index 9d8a509..5ceda85 100644
> --- a/arch/x86/pci/sta2x11-fixup.c
> +++ b/arch/x86/pci/sta2x11-fixup.c
> @@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
>   {
>   	void *vaddr;
>   
> -	vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
> -	if (!vaddr)
> -		vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
> +	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
>   	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
>   	return vaddr;
>   }
> @@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
>   /* We have our own dma_ops: the same as swiotlb but from alloc (above) */
>   static struct dma_map_ops sta2x11_dma_ops = {
>   	.alloc = sta2x11_swiotlb_alloc_coherent,
> -	.free = swiotlb_free_coherent,
> +	.free = x86_swiotlb_free_coherent,
>   	.map_page = swiotlb_map_page,
>   	.unmap_page = swiotlb_unmap_page,
>   	.map_sg = swiotlb_map_sg_attrs,
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index a5ffd32..e7a018e 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { }
>   #endif
>   
>   extern void swiotlb_print_info(void);
> +extern int is_swiotlb_buffer(phys_addr_t paddr);
> +
>   #endif /* __LINUX_SWIOTLB_H */
> diff --git a/lib/swiotlb.c b/lib/swiotlb.c
> index fe978e0..6e4a798 100644
> --- a/lib/swiotlb.c
> +++ b/lib/swiotlb.c
> @@ -369,7 +369,7 @@ void __init swiotlb_free(void)
>   	io_tlb_nslabs = 0;
>   }
>   
> -static int is_swiotlb_buffer(phys_addr_t paddr)
> +int is_swiotlb_buffer(phys_addr_t paddr)
>   {
>   	return paddr >= io_tlb_start && paddr < io_tlb_end;
>   }

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-21 18:11     ` H. Peter Anvin
@ 2013-11-22 12:27       ` Akinobu Mita
  2013-11-22 15:02         ` H. Peter Anvin
  0 siblings, 1 reply; 11+ messages in thread
From: Akinobu Mita @ 2013-11-22 12:27 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: LKML, Andrew Morton, Marek Szyprowski, Konrad Rzeszutek Wilk,
	Thomas Gleixner, Ingo Molnar, x86

2013/11/22 H. Peter Anvin <hpa@zytor.com>:
> On 11/21/2013 07:02 AM, Akinobu Mita wrote:
>>
>> I want to use DMA CMA for the feature that requires a single huge chunk
>> (about 64MB) of host memory.
>
> That is tautological.  "I want to use the Contiguous Memory Allocator to
> allocate contiguous memory."
>
>> Please refer 3 of 24 in:
>> http://www.jedec.org/sites/default/files/T_Fujisawa_MF_2013.pdf
>
> But yes, it makes sense that a device using host memory would not be
> able to leverage swiotlb, because the software simply has no clue what
> the DMA transactions are.  In that sense it is fundamentally different
> from a real IOTLB.

I'm currently working for integrating DMA CMA with intel-iommu which
I can test.  Regardless of which dma ops is actually used, I would
like to allocate large contiguous memory with dma_alloc_coherent()
and tell the base address to the device.

We can use mem= or crashkernel= to reserve host memory, but it is
complicated and unables to work with real IOMMU.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-22 12:27       ` Akinobu Mita
@ 2013-11-22 15:02         ` H. Peter Anvin
  2013-11-22 21:19           ` Andi Kleen
  2013-11-23 13:59           ` Akinobu Mita
  0 siblings, 2 replies; 11+ messages in thread
From: H. Peter Anvin @ 2013-11-22 15:02 UTC (permalink / raw)
  To: Akinobu Mita
  Cc: LKML, Andrew Morton, Marek Szyprowski, Konrad Rzeszutek Wilk,
	Thomas Gleixner, Ingo Molnar, x86

On 11/22/2013 04:27 AM, Akinobu Mita wrote:
> 
> I'm currently working for integrating DMA CMA with intel-iommu which
> I can test.  Regardless of which dma ops is actually used, I would
> like to allocate large contiguous memory with dma_alloc_coherent()
> and tell the base address to the device.
> 
> We can use mem= or crashkernel= to reserve host memory, but it is
> complicated and unables to work with real IOMMU.
> 

If you have a real iotlb, why would you need contiguous memory?

	-hpa


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-22 15:02         ` H. Peter Anvin
@ 2013-11-22 21:19           ` Andi Kleen
  2013-11-23 13:59           ` Akinobu Mita
  1 sibling, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2013-11-22 21:19 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Akinobu Mita, LKML, Andrew Morton, Marek Szyprowski,
	Konrad Rzeszutek Wilk, Thomas Gleixner, Ingo Molnar, x86

"H. Peter Anvin" <hpa@zytor.com> writes:
>
> If you have a real iotlb, why would you need contiguous memory?

iotlb is often slow.

Another use case is allocating 1GB pages at runtime.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable DMA CMA with swiotlb
  2013-11-22 15:02         ` H. Peter Anvin
  2013-11-22 21:19           ` Andi Kleen
@ 2013-11-23 13:59           ` Akinobu Mita
  1 sibling, 0 replies; 11+ messages in thread
From: Akinobu Mita @ 2013-11-23 13:59 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: LKML, Andrew Morton, Marek Szyprowski, Konrad Rzeszutek Wilk,
	Thomas Gleixner, Ingo Molnar, x86, Andi Kleen

2013/11/23 H. Peter Anvin <hpa@zytor.com>:
> On 11/22/2013 04:27 AM, Akinobu Mita wrote:
>>
>> I'm currently working for integrating DMA CMA with intel-iommu which
>> I can test.  Regardless of which dma ops is actually used, I would
>> like to allocate large contiguous memory with dma_alloc_coherent()
>> and tell the base address to the device.
>>
>> We can use mem= or crashkernel= to reserve host memory, but it is
>> complicated and unables to work with real IOMMU.
>>
>
> If you have a real iotlb, why would you need contiguous memory?

Because allocating large memory by dma_alloc_coherent() with intel-iommu
fails.  intel_alloc_coherent() allocates memory by alloc_pages() and
then maps it by __intel_map_single().

What I'm trying change wit inte-iommu is adding the same scheme done
by dma_generic_alloc_coherent(), i.e. trying to allocate memory by
dma_alloc_from_contiguous() and alloc_pages() is used as a fallback.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-11-23 13:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-19 23:32 [PATCH] x86: enable DMA CMA with swiotlb Akinobu Mita
2013-11-20 14:00 ` Konrad Rzeszutek Wilk
2013-11-20 23:35   ` Akinobu Mita
2013-11-20 23:47 ` H. Peter Anvin
2013-11-21 15:02   ` Akinobu Mita
2013-11-21 18:11     ` H. Peter Anvin
2013-11-22 12:27       ` Akinobu Mita
2013-11-22 15:02         ` H. Peter Anvin
2013-11-22 21:19           ` Andi Kleen
2013-11-23 13:59           ` Akinobu Mita
2013-11-22  9:15 ` Marek Szyprowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox