All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ben Skeggs <skeggsb-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: Alexandre Courbot
	<acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>,
	Ben Skeggs <bskeggs-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Dave Airlie <airlied-cv59FeDIM0c@public.gmane.org>
Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,
	Andrew Morton
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Guenter Roeck <linux-0h96xk9xTtrk1uMJSBkQmQ@public.gmane.org>
Subject: Re: [PATCH] instmem/gk20a: use DMA API CPU mapping
Date: Thu, 12 Nov 2015 08:49:43 +1000	[thread overview]
Message-ID: <5643C607.60204@gmail.com> (raw)
In-Reply-To: <1447229271-22901-1-git-send-email-acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>


[-- Attachment #1.1: Type: text/plain, Size: 12532 bytes --]

On 11/11/2015 06:07 PM, Alexandre Courbot wrote:
> Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access")
> tried to be smart while using the DMA-API by managing the CPU mappings of
> buffers allocated with the DMA-API by itself. In doing so, it relied
> on dma_to_phys() which is an architecture-private function not
> available everywhere. This broke the build on several architectures.
> 
> Since there is no reliable and portable way to obtain the physical
> address of a DMA-API buffer, stop trying to be smart and just use the
> CPU mapping that the DMA-API can provide. This means that buffers will
> be CPU-mapped for all their life as opposed to when we need them, but
> anyway using the DMA-API here is a fallback for when no IOMMU is
> available so we should not expect optimal behavior.
> 
> This makes the IOMMU and DMA-API implementations of instmem diverge
> enough that we should maybe put them into separate files...
> 
> Signed-off-by: Alexandre Courbot <acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
> ---
> Ben/Dave, since Dave's fix has reached mainline and builds are not
> broken anymore, we can proceed one of two ways:
> 
> 1) Ben merges this for 4.4 and let it flow for -rc2
> 2) I send another fix against the kernel tree
I just spoke to Dave, and I'll take this in my tree for 4.5 if
everything works fine with the temporary hack fix.  Does that sound OK
to you?

Ben.
> 
> Which one shall I do?
> 
>  drm/nouveau/nvkm/subdev/instmem/gk20a.c | 148 +++++++++++++-------------------
>  lib/include/nvif/os.h                   |   6 --
>  2 files changed, 62 insertions(+), 92 deletions(-)
> 
> diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> index 681b2541229a..4c20fec64d96 100644
> --- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> +++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> @@ -56,9 +56,6 @@ struct gk20a_instobj {
>  
>  	/* CPU mapping */
>  	u32 *vaddr;
> -	struct list_head vaddr_node;
> -	/* How many clients are using vaddr? */
> -	u32 use_cpt;
>  };
>  #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
>  
> @@ -68,7 +65,6 @@ struct gk20a_instobj {
>  struct gk20a_instobj_dma {
>  	struct gk20a_instobj base;
>  
> -	u32 *cpuaddr;
>  	dma_addr_t handle;
>  	struct nvkm_mm_node r;
>  };
> @@ -81,6 +77,11 @@ struct gk20a_instobj_dma {
>  struct gk20a_instobj_iommu {
>  	struct gk20a_instobj base;
>  
> +	/* to link into gk20a_instmem::vaddr_lru */
> +	struct list_head vaddr_node;
> +	/* how many clients are using vaddr? */
> +	u32 use_cpt;
> +
>  	/* will point to the higher half of pages */
>  	dma_addr_t *dma_addrs;
>  	/* array of base.mem->size pages (+ dma_addr_ts) */
> @@ -109,8 +110,6 @@ struct gk20a_instmem {
>  
>  	/* Only used by DMA API */
>  	struct dma_attrs attrs;
> -
> -	void __iomem * (*cpu_map)(struct nvkm_memory *);
>  };
>  #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base)
>  
> @@ -132,46 +131,19 @@ gk20a_instobj_size(struct nvkm_memory *memory)
>  	return (u64)gk20a_instobj(memory)->mem.size << 12;
>  }
>  
> -static void __iomem *
> -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory)
> -{
> -	struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
> -	struct device *dev = node->base.imem->base.subdev.device->dev;
> -	int npages = nvkm_memory_size(memory) >> 12;
> -	struct page *pages[npages];
> -	int i;
> -
> -	/* phys_to_page does not exist on all platforms... */
> -	pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT);
> -	for (i = 1; i < npages; i++)
> -		pages[i] = pages[0] + i;
> -
> -	return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> -}
> -
> -static void __iomem *
> -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory)
> -{
> -	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
> -	int npages = nvkm_memory_size(memory) >> 12;
> -
> -	return vmap(node->pages, npages, VM_MAP,
> -		    pgprot_writecombine(PAGE_KERNEL));
> -}
> -
>  /*
>   * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held.
>   */
>  static void
> -gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj)
> +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
>  {
> -	struct gk20a_instmem *imem = obj->imem;
> +	struct gk20a_instmem *imem = obj->base.imem;
>  	/* there should not be any user left... */
>  	WARN_ON(obj->use_cpt);
>  	list_del(&obj->vaddr_node);
> -	vunmap(obj->vaddr);
> -	obj->vaddr = NULL;
> -	imem->vaddr_use -= nvkm_memory_size(&obj->memory);
> +	vunmap(obj->base.vaddr);
> +	obj->base.vaddr = NULL;
> +	imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
>  	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
>  		   imem->vaddr_max);
>  }
> @@ -187,17 +159,30 @@ gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size)
>  		if (list_empty(&imem->vaddr_lru))
>  			break;
>  
> -		gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru,
> -					     struct gk20a_instobj, vaddr_node));
> +		gk20a_instobj_iommu_recycle_vaddr(
> +				list_first_entry(&imem->vaddr_lru,
> +				struct gk20a_instobj_iommu, vaddr_node));
>  	}
>  }
>  
>  static void __iomem *
> -gk20a_instobj_acquire(struct nvkm_memory *memory)
> +gk20a_instobj_acquire_dma(struct nvkm_memory *memory)
>  {
>  	struct gk20a_instobj *node = gk20a_instobj(memory);
>  	struct gk20a_instmem *imem = node->imem;
>  	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
> +
> +	nvkm_ltc_flush(ltc);
> +
> +	return node->vaddr;
> +}
> +
> +static void __iomem *
> +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory)
> +{
> +	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
> +	struct gk20a_instmem *imem = node->base.imem;
> +	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
>  	const u64 size = nvkm_memory_size(memory);
>  	unsigned long flags;
>  
> @@ -205,7 +190,7 @@ gk20a_instobj_acquire(struct nvkm_memory *memory)
>  
>  	spin_lock_irqsave(&imem->lock, flags);
>  
> -	if (node->vaddr) {
> +	if (node->base.vaddr) {
>  		if (!node->use_cpt) {
>  			/* remove from LRU list since mapping in use again */
>  			list_del(&node->vaddr_node);
> @@ -216,9 +201,10 @@ gk20a_instobj_acquire(struct nvkm_memory *memory)
>  	/* try to free some address space if we reached the limit */
>  	gk20a_instmem_vaddr_gc(imem, size);
>  
> -	node->vaddr = imem->cpu_map(memory);
> -
> -	if (!node->vaddr) {
> +	/* map the pages */
> +	node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP,
> +				pgprot_writecombine(PAGE_KERNEL));
> +	if (!node->base.vaddr) {
>  		nvkm_error(&imem->base.subdev, "cannot map instobj - "
>  			   "this is not going to end well...\n");
>  		goto out;
> @@ -232,15 +218,25 @@ out:
>  	node->use_cpt++;
>  	spin_unlock_irqrestore(&imem->lock, flags);
>  
> -	return node->vaddr;
> +	return node->base.vaddr;
>  }
>  
>  static void
> -gk20a_instobj_release(struct nvkm_memory *memory)
> +gk20a_instobj_release_dma(struct nvkm_memory *memory)
>  {
>  	struct gk20a_instobj *node = gk20a_instobj(memory);
>  	struct gk20a_instmem *imem = node->imem;
>  	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
> +
> +	nvkm_ltc_invalidate(ltc);
> +}
> +
> +static void
> +gk20a_instobj_release_iommu(struct nvkm_memory *memory)
> +{
> +	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
> +	struct gk20a_instmem *imem = node->base.imem;
> +	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&imem->lock, flags);
> @@ -284,27 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset)
>  	nvkm_vm_map_at(vma, offset, &node->mem);
>  }
>  
> -/*
> - * Clear the CPU mapping of an instobj if it exists
> - */
> -static void
> -gk20a_instobj_dtor(struct gk20a_instobj *node)
> -{
> -	struct gk20a_instmem *imem = node->imem;
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&imem->lock, flags);
> -
> -	/* vaddr has already been recycled */
> -	if (!node->vaddr)
> -		goto out;
> -
> -	gk20a_instobj_recycle_vaddr(node);
> -
> -out:
> -	spin_unlock_irqrestore(&imem->lock, flags);
> -}
> -
>  static void *
>  gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
>  {
> @@ -312,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
>  	struct gk20a_instmem *imem = node->base.imem;
>  	struct device *dev = imem->base.subdev.device->dev;
>  
> -	gk20a_instobj_dtor(&node->base);
> -
> -	if (unlikely(!node->cpuaddr))
> +	if (unlikely(!node->base.vaddr))
>  		goto out;
>  
> -	dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr,
> +	dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr,
>  		       node->handle, &imem->attrs);
>  
>  out:
> @@ -331,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
>  	struct gk20a_instmem *imem = node->base.imem;
>  	struct device *dev = imem->base.subdev.device->dev;
>  	struct nvkm_mm_node *r;
> +	unsigned long flags;
>  	int i;
>  
> -	gk20a_instobj_dtor(&node->base);
> -
>  	if (unlikely(list_empty(&node->base.mem.regions)))
>  		goto out;
>  
> +	spin_lock_irqsave(&imem->lock, flags);
> +
> +	/* vaddr has already been recycled */
> +	if (node->base.vaddr)
> +		gk20a_instobj_iommu_recycle_vaddr(node);
> +
> +	spin_unlock_irqrestore(&imem->lock, flags);
> +
>  	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
>  			     rl_entry);
>  
> @@ -368,8 +348,8 @@ gk20a_instobj_func_dma = {
>  	.target = gk20a_instobj_target,
>  	.addr = gk20a_instobj_addr,
>  	.size = gk20a_instobj_size,
> -	.acquire = gk20a_instobj_acquire,
> -	.release = gk20a_instobj_release,
> +	.acquire = gk20a_instobj_acquire_dma,
> +	.release = gk20a_instobj_release_dma,
>  	.rd32 = gk20a_instobj_rd32,
>  	.wr32 = gk20a_instobj_wr32,
>  	.map = gk20a_instobj_map,
> @@ -381,8 +361,8 @@ gk20a_instobj_func_iommu = {
>  	.target = gk20a_instobj_target,
>  	.addr = gk20a_instobj_addr,
>  	.size = gk20a_instobj_size,
> -	.acquire = gk20a_instobj_acquire,
> -	.release = gk20a_instobj_release,
> +	.acquire = gk20a_instobj_acquire_iommu,
> +	.release = gk20a_instobj_release_iommu,
>  	.rd32 = gk20a_instobj_rd32,
>  	.wr32 = gk20a_instobj_wr32,
>  	.map = gk20a_instobj_map,
> @@ -402,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
>  
>  	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
>  
> -	node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
> -					&node->handle, GFP_KERNEL,
> -					&imem->attrs);
> -	if (!node->cpuaddr) {
> +	node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
> +					   &node->handle, GFP_KERNEL,
> +					   &imem->attrs);
> +	if (!node->base.vaddr) {
>  		nvkm_error(subdev, "cannot allocate DMA memory\n");
>  		return -ENOMEM;
>  	}
> @@ -611,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
>  		imem->mm = &tdev->iommu.mm;
>  		imem->domain = tdev->iommu.domain;
>  		imem->iommu_pgshift = tdev->iommu.pgshift;
> -		imem->cpu_map = gk20a_instobj_cpu_map_iommu;
>  		imem->iommu_bit = tdev->func->iommu_bit;
>  
>  		nvkm_info(&imem->base.subdev, "using IOMMU\n");
>  	} else {
>  		init_dma_attrs(&imem->attrs);
> -		/* We will access the memory through our own mapping */
>  		dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs);
>  		dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs);
>  		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs);
> -		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs);
> -		imem->cpu_map = gk20a_instobj_cpu_map_dma;
>  
>  		nvkm_info(&imem->base.subdev, "using DMA API\n");
>  	}
> diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h
> index 2df30489179a..e8c06cb254dc 100644
> --- a/lib/include/nvif/os.h
> +++ b/lib/include/nvif/os.h
> @@ -943,12 +943,6 @@ dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags)
>  {
>  }
>  
> -static inline phys_addr_t
> -dma_to_phys(struct device *dev, dma_addr_t addr)
> -{
> -	return 0;
> -}
> -
>  /******************************************************************************
>   * PCI
>   *****************************************************************************/
> 


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

WARNING: multiple messages have this Message-ID (diff)
From: Ben Skeggs <skeggsb@gmail.com>
To: Alexandre Courbot <acourbot@nvidia.com>,
	Ben Skeggs <bskeggs@redhat.com>, Dave Airlie <airlied@linux.ie>
Cc: nouveau@lists.freedesktop.org, linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Guenter Roeck <linux@roeck-us.net>
Subject: Re: [Nouveau] [PATCH] instmem/gk20a: use DMA API CPU mapping
Date: Thu, 12 Nov 2015 08:49:43 +1000	[thread overview]
Message-ID: <5643C607.60204@gmail.com> (raw)
In-Reply-To: <1447229271-22901-1-git-send-email-acourbot@nvidia.com>

[-- Attachment #1: Type: text/plain, Size: 12503 bytes --]

On 11/11/2015 06:07 PM, Alexandre Courbot wrote:
> Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access")
> tried to be smart while using the DMA-API by managing the CPU mappings of
> buffers allocated with the DMA-API by itself. In doing so, it relied
> on dma_to_phys() which is an architecture-private function not
> available everywhere. This broke the build on several architectures.
> 
> Since there is no reliable and portable way to obtain the physical
> address of a DMA-API buffer, stop trying to be smart and just use the
> CPU mapping that the DMA-API can provide. This means that buffers will
> be CPU-mapped for all their life as opposed to when we need them, but
> anyway using the DMA-API here is a fallback for when no IOMMU is
> available so we should not expect optimal behavior.
> 
> This makes the IOMMU and DMA-API implementations of instmem diverge
> enough that we should maybe put them into separate files...
> 
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
> Ben/Dave, since Dave's fix has reached mainline and builds are not
> broken anymore, we can proceed one of two ways:
> 
> 1) Ben merges this for 4.4 and let it flow for -rc2
> 2) I send another fix against the kernel tree
I just spoke to Dave, and I'll take this in my tree for 4.5 if
everything works fine with the temporary hack fix.  Does that sound OK
to you?

Ben.
> 
> Which one shall I do?
> 
>  drm/nouveau/nvkm/subdev/instmem/gk20a.c | 148 +++++++++++++-------------------
>  lib/include/nvif/os.h                   |   6 --
>  2 files changed, 62 insertions(+), 92 deletions(-)
> 
> diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> index 681b2541229a..4c20fec64d96 100644
> --- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> +++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
> @@ -56,9 +56,6 @@ struct gk20a_instobj {
>  
>  	/* CPU mapping */
>  	u32 *vaddr;
> -	struct list_head vaddr_node;
> -	/* How many clients are using vaddr? */
> -	u32 use_cpt;
>  };
>  #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
>  
> @@ -68,7 +65,6 @@ struct gk20a_instobj {
>  struct gk20a_instobj_dma {
>  	struct gk20a_instobj base;
>  
> -	u32 *cpuaddr;
>  	dma_addr_t handle;
>  	struct nvkm_mm_node r;
>  };
> @@ -81,6 +77,11 @@ struct gk20a_instobj_dma {
>  struct gk20a_instobj_iommu {
>  	struct gk20a_instobj base;
>  
> +	/* to link into gk20a_instmem::vaddr_lru */
> +	struct list_head vaddr_node;
> +	/* how many clients are using vaddr? */
> +	u32 use_cpt;
> +
>  	/* will point to the higher half of pages */
>  	dma_addr_t *dma_addrs;
>  	/* array of base.mem->size pages (+ dma_addr_ts) */
> @@ -109,8 +110,6 @@ struct gk20a_instmem {
>  
>  	/* Only used by DMA API */
>  	struct dma_attrs attrs;
> -
> -	void __iomem * (*cpu_map)(struct nvkm_memory *);
>  };
>  #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base)
>  
> @@ -132,46 +131,19 @@ gk20a_instobj_size(struct nvkm_memory *memory)
>  	return (u64)gk20a_instobj(memory)->mem.size << 12;
>  }
>  
> -static void __iomem *
> -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory)
> -{
> -	struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
> -	struct device *dev = node->base.imem->base.subdev.device->dev;
> -	int npages = nvkm_memory_size(memory) >> 12;
> -	struct page *pages[npages];
> -	int i;
> -
> -	/* phys_to_page does not exist on all platforms... */
> -	pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT);
> -	for (i = 1; i < npages; i++)
> -		pages[i] = pages[0] + i;
> -
> -	return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> -}
> -
> -static void __iomem *
> -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory)
> -{
> -	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
> -	int npages = nvkm_memory_size(memory) >> 12;
> -
> -	return vmap(node->pages, npages, VM_MAP,
> -		    pgprot_writecombine(PAGE_KERNEL));
> -}
> -
>  /*
>   * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held.
>   */
>  static void
> -gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj)
> +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
>  {
> -	struct gk20a_instmem *imem = obj->imem;
> +	struct gk20a_instmem *imem = obj->base.imem;
>  	/* there should not be any user left... */
>  	WARN_ON(obj->use_cpt);
>  	list_del(&obj->vaddr_node);
> -	vunmap(obj->vaddr);
> -	obj->vaddr = NULL;
> -	imem->vaddr_use -= nvkm_memory_size(&obj->memory);
> +	vunmap(obj->base.vaddr);
> +	obj->base.vaddr = NULL;
> +	imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
>  	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
>  		   imem->vaddr_max);
>  }
> @@ -187,17 +159,30 @@ gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size)
>  		if (list_empty(&imem->vaddr_lru))
>  			break;
>  
> -		gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru,
> -					     struct gk20a_instobj, vaddr_node));
> +		gk20a_instobj_iommu_recycle_vaddr(
> +				list_first_entry(&imem->vaddr_lru,
> +				struct gk20a_instobj_iommu, vaddr_node));
>  	}
>  }
>  
>  static void __iomem *
> -gk20a_instobj_acquire(struct nvkm_memory *memory)
> +gk20a_instobj_acquire_dma(struct nvkm_memory *memory)
>  {
>  	struct gk20a_instobj *node = gk20a_instobj(memory);
>  	struct gk20a_instmem *imem = node->imem;
>  	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
> +
> +	nvkm_ltc_flush(ltc);
> +
> +	return node->vaddr;
> +}
> +
> +static void __iomem *
> +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory)
> +{
> +	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
> +	struct gk20a_instmem *imem = node->base.imem;
> +	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
>  	const u64 size = nvkm_memory_size(memory);
>  	unsigned long flags;
>  
> @@ -205,7 +190,7 @@ gk20a_instobj_acquire(struct nvkm_memory *memory)
>  
>  	spin_lock_irqsave(&imem->lock, flags);
>  
> -	if (node->vaddr) {
> +	if (node->base.vaddr) {
>  		if (!node->use_cpt) {
>  			/* remove from LRU list since mapping in use again */
>  			list_del(&node->vaddr_node);
> @@ -216,9 +201,10 @@ gk20a_instobj_acquire(struct nvkm_memory *memory)
>  	/* try to free some address space if we reached the limit */
>  	gk20a_instmem_vaddr_gc(imem, size);
>  
> -	node->vaddr = imem->cpu_map(memory);
> -
> -	if (!node->vaddr) {
> +	/* map the pages */
> +	node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP,
> +				pgprot_writecombine(PAGE_KERNEL));
> +	if (!node->base.vaddr) {
>  		nvkm_error(&imem->base.subdev, "cannot map instobj - "
>  			   "this is not going to end well...\n");
>  		goto out;
> @@ -232,15 +218,25 @@ out:
>  	node->use_cpt++;
>  	spin_unlock_irqrestore(&imem->lock, flags);
>  
> -	return node->vaddr;
> +	return node->base.vaddr;
>  }
>  
>  static void
> -gk20a_instobj_release(struct nvkm_memory *memory)
> +gk20a_instobj_release_dma(struct nvkm_memory *memory)
>  {
>  	struct gk20a_instobj *node = gk20a_instobj(memory);
>  	struct gk20a_instmem *imem = node->imem;
>  	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
> +
> +	nvkm_ltc_invalidate(ltc);
> +}
> +
> +static void
> +gk20a_instobj_release_iommu(struct nvkm_memory *memory)
> +{
> +	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
> +	struct gk20a_instmem *imem = node->base.imem;
> +	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&imem->lock, flags);
> @@ -284,27 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset)
>  	nvkm_vm_map_at(vma, offset, &node->mem);
>  }
>  
> -/*
> - * Clear the CPU mapping of an instobj if it exists
> - */
> -static void
> -gk20a_instobj_dtor(struct gk20a_instobj *node)
> -{
> -	struct gk20a_instmem *imem = node->imem;
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&imem->lock, flags);
> -
> -	/* vaddr has already been recycled */
> -	if (!node->vaddr)
> -		goto out;
> -
> -	gk20a_instobj_recycle_vaddr(node);
> -
> -out:
> -	spin_unlock_irqrestore(&imem->lock, flags);
> -}
> -
>  static void *
>  gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
>  {
> @@ -312,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
>  	struct gk20a_instmem *imem = node->base.imem;
>  	struct device *dev = imem->base.subdev.device->dev;
>  
> -	gk20a_instobj_dtor(&node->base);
> -
> -	if (unlikely(!node->cpuaddr))
> +	if (unlikely(!node->base.vaddr))
>  		goto out;
>  
> -	dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr,
> +	dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr,
>  		       node->handle, &imem->attrs);
>  
>  out:
> @@ -331,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
>  	struct gk20a_instmem *imem = node->base.imem;
>  	struct device *dev = imem->base.subdev.device->dev;
>  	struct nvkm_mm_node *r;
> +	unsigned long flags;
>  	int i;
>  
> -	gk20a_instobj_dtor(&node->base);
> -
>  	if (unlikely(list_empty(&node->base.mem.regions)))
>  		goto out;
>  
> +	spin_lock_irqsave(&imem->lock, flags);
> +
> +	/* vaddr has already been recycled */
> +	if (node->base.vaddr)
> +		gk20a_instobj_iommu_recycle_vaddr(node);
> +
> +	spin_unlock_irqrestore(&imem->lock, flags);
> +
>  	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
>  			     rl_entry);
>  
> @@ -368,8 +348,8 @@ gk20a_instobj_func_dma = {
>  	.target = gk20a_instobj_target,
>  	.addr = gk20a_instobj_addr,
>  	.size = gk20a_instobj_size,
> -	.acquire = gk20a_instobj_acquire,
> -	.release = gk20a_instobj_release,
> +	.acquire = gk20a_instobj_acquire_dma,
> +	.release = gk20a_instobj_release_dma,
>  	.rd32 = gk20a_instobj_rd32,
>  	.wr32 = gk20a_instobj_wr32,
>  	.map = gk20a_instobj_map,
> @@ -381,8 +361,8 @@ gk20a_instobj_func_iommu = {
>  	.target = gk20a_instobj_target,
>  	.addr = gk20a_instobj_addr,
>  	.size = gk20a_instobj_size,
> -	.acquire = gk20a_instobj_acquire,
> -	.release = gk20a_instobj_release,
> +	.acquire = gk20a_instobj_acquire_iommu,
> +	.release = gk20a_instobj_release_iommu,
>  	.rd32 = gk20a_instobj_rd32,
>  	.wr32 = gk20a_instobj_wr32,
>  	.map = gk20a_instobj_map,
> @@ -402,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
>  
>  	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
>  
> -	node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
> -					&node->handle, GFP_KERNEL,
> -					&imem->attrs);
> -	if (!node->cpuaddr) {
> +	node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
> +					   &node->handle, GFP_KERNEL,
> +					   &imem->attrs);
> +	if (!node->base.vaddr) {
>  		nvkm_error(subdev, "cannot allocate DMA memory\n");
>  		return -ENOMEM;
>  	}
> @@ -611,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
>  		imem->mm = &tdev->iommu.mm;
>  		imem->domain = tdev->iommu.domain;
>  		imem->iommu_pgshift = tdev->iommu.pgshift;
> -		imem->cpu_map = gk20a_instobj_cpu_map_iommu;
>  		imem->iommu_bit = tdev->func->iommu_bit;
>  
>  		nvkm_info(&imem->base.subdev, "using IOMMU\n");
>  	} else {
>  		init_dma_attrs(&imem->attrs);
> -		/* We will access the memory through our own mapping */
>  		dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs);
>  		dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs);
>  		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs);
> -		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs);
> -		imem->cpu_map = gk20a_instobj_cpu_map_dma;
>  
>  		nvkm_info(&imem->base.subdev, "using DMA API\n");
>  	}
> diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h
> index 2df30489179a..e8c06cb254dc 100644
> --- a/lib/include/nvif/os.h
> +++ b/lib/include/nvif/os.h
> @@ -943,12 +943,6 @@ dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags)
>  {
>  }
>  
> -static inline phys_addr_t
> -dma_to_phys(struct device *dev, dma_addr_t addr)
> -{
> -	return 0;
> -}
> -
>  /******************************************************************************
>   * PCI
>   *****************************************************************************/
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

  parent reply	other threads:[~2015-11-11 22:49 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-11  8:07 [PATCH] instmem/gk20a: use DMA API CPU mapping Alexandre Courbot
2015-11-11  8:07 ` Alexandre Courbot
     [not found] ` <1447229271-22901-1-git-send-email-acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2015-11-11 22:49   ` Ben Skeggs [this message]
2015-11-11 22:49     ` [Nouveau] " Ben Skeggs
     [not found]     ` <5643C607.60204-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2015-11-12  2:07       ` Alexandre Courbot
2015-11-12  2:07         ` [Nouveau] " Alexandre Courbot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5643C607.60204@gmail.com \
    --to=skeggsb-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org \
    --cc=airlied-cv59FeDIM0c@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=bskeggs-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-0h96xk9xTtrk1uMJSBkQmQ@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.